aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64.h6
-rw-r--r--lib/Target/AArch64/AArch64.td116
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp49
-rw-r--r--lib/Target/AArch64/AArch64AddressTypePromotion.cpp23
-rw-r--r--lib/Target/AArch64/AArch64CallLowering.cpp182
-rw-r--r--lib/Target/AArch64/AArch64CallLowering.h27
-rw-r--r--lib/Target/AArch64/AArch64ConditionOptimizer.cpp8
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp11
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp65
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp102
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp45
-rw-r--r--lib/Target/AArch64/AArch64GenRegisterBankInfo.def419
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp47
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp338
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h21
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td69
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp391
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h39
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td49
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp337
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.h49
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp133
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.h7
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp73
-rw-r--r--lib/Target/AArch64/AArch64MacroFusion.cpp272
-rw-r--r--lib/Target/AArch64/AArch64MacroFusion.h29
-rw-r--r--lib/Target/AArch64/AArch64RedundantCopyElimination.cpp358
-rw-r--r--lib/Target/AArch64/AArch64RegisterBankInfo.cpp213
-rw-r--r--lib/Target/AArch64/AArch64RegisterBankInfo.h77
-rw-r--r--lib/Target/AArch64/AArch64RegisterBanks.td20
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp16
-rw-r--r--lib/Target/AArch64/AArch64SchedA53.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedA57.td4
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkor.td106
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorDetails.td523
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorWriteRes.td361
-rw-r--r--lib/Target/AArch64/AArch64SchedKryoDetails.td62
-rw-r--r--lib/Target/AArch64/AArch64SchedM1.td3
-rw-r--r--lib/Target/AArch64/AArch64SchedThunderX.td352
-rw-r--r--lib/Target/AArch64/AArch64SchedThunderX2T99.td (renamed from lib/Target/AArch64/AArch64SchedVulcan.td)354
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp14
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp22
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h33
-rw-r--r--lib/Target/AArch64/AArch64SystemOperands.td134
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp36
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h2
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp56
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h16
-rw-r--r--lib/Target/AArch64/AArch64VectorByElementOpt.cpp25
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp475
-rw-r--r--lib/Target/AArch64/CMakeLists.txt2
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp281
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h10
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp92
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp23
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp11
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp30
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h78
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h95
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td108
-rw-r--r--lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp147
-rw-r--r--lib/Target/AMDGPU/AMDGPUAliasAnalysis.h102
-rw-r--r--lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp14
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp52
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp212
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.h123
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallLowering.cpp134
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallLowering.h10
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallingConv.td30
-rw-r--r--lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp120
-rw-r--r--lib/Target/AMDGPU/AMDGPUFrameLowering.cpp36
-rw-r--r--lib/Target/AMDGPU/AMDGPUFrameLowering.h3
-rw-r--r--lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def62
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp151
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp446
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h62
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.cpp3
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.h4
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.td74
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp424
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructionSelector.h67
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructions.td138
-rw-r--r--lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp13
-rw-r--r--lib/Target/AMDGPU/AMDGPUIntrinsics.td17
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp62
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.h30
-rw-r--r--lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp160
-rw-r--r--lib/Target/AMDGPU/AMDGPUMCInstLower.cpp33
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.cpp18
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.h14
-rw-r--r--lib/Target/AMDGPU/AMDGPUPTNote.h5
-rw-r--r--lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp135
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp230
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBankInfo.h65
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBanks.td16
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterInfo.h3
-rw-r--r--lib/Target/AMDGPU/AMDGPURuntimeMetadata.h193
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp253
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h293
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp230
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.h14
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp4
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetObjectFile.h1
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp241
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h24
-rw-r--r--lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp225
-rw-r--r--lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp27
-rw-r--r--lib/Target/AMDGPU/AMDILCFGStructurizer.cpp105
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp856
-rw-r--r--lib/Target/AMDGPU/BUFInstructions.td10
-rw-r--r--lib/Target/AMDGPU/CMakeLists.txt15
-rw-r--r--lib/Target/AMDGPU/DSInstructions.td174
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp39
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h2
-rw-r--r--lib/Target/AMDGPU/EvergreenInstructions.td119
-rw-r--r--lib/Target/AMDGPU/FLATInstructions.td12
-rw-r--r--lib/Target/AMDGPU/GCNHazardRecognizer.cpp109
-rw-r--r--lib/Target/AMDGPU/GCNHazardRecognizer.h3
-rw-r--r--lib/Target/AMDGPU/GCNIterativeScheduler.cpp528
-rw-r--r--lib/Target/AMDGPU/GCNIterativeScheduler.h118
-rw-r--r--lib/Target/AMDGPU/GCNMinRegStrategy.cpp266
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.cpp355
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.h170
-rw-r--r--lib/Target/AMDGPU/GCNSchedStrategy.cpp344
-rw-r--r--lib/Target/AMDGPU/GCNSchedStrategy.h62
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp129
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h10
-rw-r--r--lib/Target/AMDGPU/LLVMBuild.txt2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp19
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h422
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp625
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h99
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp14
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h6
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp408
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h26
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp72
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h32
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp30
-rw-r--r--lib/Target/AMDGPU/MIMGInstructions.td100
-rw-r--r--lib/Target/AMDGPU/Processors.td7
-rw-r--r--lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp48
-rw-r--r--lib/Target/AMDGPU/R600EmitClauseMarkers.cpp49
-rw-r--r--lib/Target/AMDGPU/R600FrameLowering.cpp36
-rw-r--r--lib/Target/AMDGPU/R600FrameLowering.h2
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp122
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.cpp64
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.h6
-rw-r--r--lib/Target/AMDGPU/R600Instructions.td10
-rw-r--r--lib/Target/AMDGPU/SIAnnotateControlFlow.cpp165
-rw-r--r--lib/Target/AMDGPU/SIDefines.h33
-rw-r--r--lib/Target/AMDGPU/SIFixSGPRCopies.cpp35
-rw-r--r--lib/Target/AMDGPU/SIFixVGPRCopies.cpp72
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp275
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.cpp138
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.h5
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp1780
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h44
-rw-r--r--lib/Target/AMDGPU/SIInsertSkips.cpp66
-rw-r--r--lib/Target/AMDGPU/SIInsertWaitcnts.cpp1863
-rw-r--r--lib/Target/AMDGPU/SIInsertWaits.cpp104
-rw-r--r--lib/Target/AMDGPU/SIInstrFormats.td19
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp455
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h86
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td332
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td294
-rw-r--r--lib/Target/AMDGPU/SIIntrinsics.td158
-rw-r--r--lib/Target/AMDGPU/SILoadStoreOptimizer.cpp376
-rw-r--r--lib/Target/AMDGPU/SILowerControlFlow.cpp67
-rw-r--r--lib/Target/AMDGPU/SILowerI1Copies.cpp24
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp135
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h56
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.cpp251
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.h44
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp713
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp414
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.h106
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.td37
-rw-r--r--lib/Target/AMDGPU/SISchedule.td5
-rw-r--r--lib/Target/AMDGPU/SIShrinkInstructions.cpp8
-rw-r--r--lib/Target/AMDGPU/SMInstructions.td10
-rw-r--r--lib/Target/AMDGPU/SOPInstructions.td66
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp448
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h217
-rw-r--r--lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h2
-rw-r--r--lib/Target/AMDGPU/VOP1Instructions.td107
-rw-r--r--lib/Target/AMDGPU/VOP2Instructions.td40
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td92
-rw-r--r--lib/Target/AMDGPU/VOP3PInstructions.td82
-rw-r--r--lib/Target/AMDGPU/VOPCInstructions.td8
-rw-r--r--lib/Target/AMDGPU/VOPInstructions.td80
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp24
-rw-r--r--lib/Target/ARM/ARM.h13
-rw-r--r--lib/Target/ARM/ARM.td64
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp15
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp832
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h52
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp43
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h17
-rw-r--r--lib/Target/ARM/ARMBasicBlockInfo.h21
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp323
-rw-r--r--lib/Target/ARM/ARMCallLowering.h10
-rw-r--r--lib/Target/ARM/ARMComputeBlockSize.cpp10
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp175
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp10
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h16
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp255
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp172
-rw-r--r--lib/Target/ARM/ARMFeatures.h4
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp471
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp278
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1520
-rw-r--r--lib/Target/ARM/ARMISelLowering.h29
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td24
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td364
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td11
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td141
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td562
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td280
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp270
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.h13
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp40
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.h4
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp152
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp26
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h69
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp208
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.h13
-rw-r--r--lib/Target/ARM/ARMRegisterBanks.td14
-rw-r--r--lib/Target/ARM/ARMSchedule.td58
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td43
-rw-r--r--lib/Target/ARM/ARMScheduleR52.td104
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td52
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp39
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp37
-rw-r--r--lib/Target/ARM/ARMSubtarget.h108
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp101
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h20
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp11
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h12
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp20
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h12
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp387
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp139
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp9
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp68
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp52
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp188
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp53
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp30
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp13
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp8
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp21
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h21
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp15
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp141
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp47
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp21
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp71
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp90
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.cpp48
-rw-r--r--lib/Target/AVR/AVRAsmPrinter.cpp3
-rw-r--r--lib/Target/AVR/AVRExpandPseudoInsts.cpp17
-rw-r--r--lib/Target/AVR/AVRISelLowering.cpp31
-rw-r--r--lib/Target/AVR/AVRInstrInfo.td4
-rw-r--r--lib/Target/AVR/AVRInstrumentFunctions.cpp2
-rw-r--r--lib/Target/AVR/AVRMCInstLower.cpp2
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp2
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h2
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp2
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp1
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp1
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h2
-rw-r--r--lib/Target/BPF/BPFISelDAGToDAG.cpp6
-rw-r--r--lib/Target/BPF/BPFISelLowering.cpp18
-rw-r--r--lib/Target/BPF/BPFInstrInfo.td1
-rw-r--r--lib/Target/BPF/BPFMCInstLower.cpp10
-rw-r--r--lib/Target/BPF/BPFMCInstLower.h1
-rw-r--r--lib/Target/BPF/BPFRegisterInfo.cpp28
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp6
-rw-r--r--lib/Target/CMakeLists.txt6
-rw-r--r--lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp367
-rw-r--r--lib/Target/Hexagon/BitTracker.cpp16
-rw-r--r--lib/Target/Hexagon/BitTracker.h3
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt5
-rw-r--r--lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp1332
-rw-r--r--lib/Target/Hexagon/Hexagon.td40
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp238
-rw-r--r--lib/Target/Hexagon/HexagonBitSimplify.cpp389
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.cpp16
-rw-r--r--lib/Target/Hexagon/HexagonBlockRanges.cpp77
-rw-r--r--lib/Target/Hexagon/HexagonCallingConv.td35
-rw-r--r--lib/Target/Hexagon/HexagonCommonGEP.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonCopyToCombine.cpp28
-rw-r--r--lib/Target/Hexagon/HexagonDepArch.h10
-rw-r--r--lib/Target/Hexagon/HexagonDepArch.td19
-rw-r--r--lib/Target/Hexagon/HexagonDepDecoders.h64
-rw-r--r--lib/Target/Hexagon/HexagonDepITypes.h53
-rw-r--r--lib/Target/Hexagon/HexagonDepITypes.td48
-rw-r--r--lib/Target/Hexagon/HexagonDepInstrFormats.td4182
-rw-r--r--lib/Target/Hexagon/HexagonDepInstrInfo.td45573
-rw-r--r--lib/Target/Hexagon/HexagonDepMappings.td654
-rw-r--r--lib/Target/Hexagon/HexagonDepOperands.td132
-rw-r--r--lib/Target/Hexagon/HexagonEarlyIfConv.cpp205
-rw-r--r--lib/Target/Hexagon/HexagonExpandCondsets.cpp48
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp52
-rw-r--r--lib/Target/Hexagon/HexagonGenExtract.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonGenInsert.cpp13
-rw-r--r--lib/Target/Hexagon/HexagonGenMux.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp26
-rw-r--r--lib/Target/Hexagon/HexagonIICHVX.td102
-rw-r--r--lib/Target/Hexagon/HexagonIICScalar.td164
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp320
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp170
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h11
-rw-r--r--lib/Target/Hexagon/HexagonInstrAlias.td652
-rw-r--r--lib/Target/Hexagon/HexagonInstrEnc.td1019
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td169
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td22
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV60.td22
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp225
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td4799
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV3.td215
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td3301
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV5.td497
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV60.td2068
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoVector.td69
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td19
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV60.td2
-rw-r--r--lib/Target/Hexagon/HexagonIsetDx.td728
-rw-r--r--lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp2338
-rw-r--r--lib/Target/Hexagon/HexagonMCInstLower.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td204
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonOperands.td319
-rw-r--r--lib/Target/Hexagon/HexagonOptAddrMode.cpp70
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td147
-rw-r--r--lib/Target/Hexagon/HexagonPseudo.td537
-rw-r--r--lib/Target/Hexagon/HexagonRDFOpt.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp55
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h3
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.td109
-rw-r--r--lib/Target/Hexagon/HexagonSchedule.td8
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV4.td12
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV55.td11
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV60.td13
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV62.td129
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonSplitDouble.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp1
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h7
-rw-r--r--lib/Target/Hexagon/HexagonSystemInst.td134
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp28
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h1
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp63
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.h4
-rw-r--r--lib/Target/Hexagon/LLVMBuild.txt1
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp224
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h129
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp40
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp3
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp89
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp299
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h11
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp22
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp45
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp66
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h15
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp46
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp343
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h76
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp83
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h18
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp214
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h20
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp382
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h28
-rw-r--r--lib/Target/Hexagon/RDFCopy.cpp72
-rw-r--r--lib/Target/Hexagon/RDFCopy.h11
-rw-r--r--lib/Target/Hexagon/RDFDeadCode.cpp12
-rw-r--r--lib/Target/Hexagon/RDFGraph.cpp405
-rw-r--r--lib/Target/Hexagon/RDFGraph.h159
-rw-r--r--lib/Target/Hexagon/RDFLiveness.cpp291
-rw-r--r--lib/Target/Hexagon/RDFLiveness.h32
-rw-r--r--lib/Target/Hexagon/RDFRegisters.cpp368
-rw-r--r--lib/Target/Hexagon/RDFRegisters.h209
-rw-r--r--lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp2
-rw-r--r--lib/Target/Lanai/InstPrinter/CMakeLists.txt2
-rw-r--r--lib/Target/Lanai/InstPrinter/LLVMBuild.txt2
-rw-r--r--lib/Target/Lanai/LLVMBuild.txt2
-rw-r--r--lib/Target/Lanai/LanaiInstrInfo.cpp4
-rw-r--r--lib/Target/Lanai/LanaiMCInstLower.cpp2
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp4
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp6
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp4
-rw-r--r--lib/Target/MSP430/MSP430CallingConv.td8
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp4
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp87
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h6
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.cpp2
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h10
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp766
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp87
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h39
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp74
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp68
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp8
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h15
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp60
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h27
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp8
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.h6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp16
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp10
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp40
-rw-r--r--lib/Target/Mips/MicroMips64r6InstrInfo.td28
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td33
-rw-r--r--lib/Target/Mips/Mips.td2
-rw-r--r--lib/Target/Mips/Mips16HardFloat.cpp18
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td2
-rw-r--r--lib/Target/Mips/Mips32r6InstrInfo.td6
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td264
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp173
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h16
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp119
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp58
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp106
-rw-r--r--lib/Target/Mips/MipsHazardSchedule.cpp27
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp190
-rw-r--r--lib/Target/Mips/MipsISelLowering.h54
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp32
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h3
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td243
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp44
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp13
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h34
-rw-r--r--lib/Target/Mips/MipsOptionRecord.h17
-rw-r--r--lib/Target/Mips/MipsOs16.cpp2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td39
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp43
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h9
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp241
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp18
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp15
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp7
-rw-r--r--lib/Target/Mips/MipsSubtarget.h9
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp42
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h19
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp12
-rw-r--r--lib/Target/NVPTX/LLVMBuild.txt2
-rw-r--r--lib/Target/NVPTX/NVPTX.h5
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp42
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp2449
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h4
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp1694
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h34
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp583
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp5
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td530
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td626
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp223
-rw-r--r--lib/Target/NVPTX/NVPTXLowerArgs.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h10
-rw-r--r--lib/Target/NVPTX/NVPTXPeephole.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp52
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td4
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h2
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp9
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp15
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h3
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.h4
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp62
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp32
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp34
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h9
-rw-r--r--lib/Target/PowerPC/PPC.h2
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp98
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp2
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp88
-rw-r--r--lib/Target/PowerPC/PPCExpandISEL.cpp458
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp39
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp109
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp427
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h71
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td10
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td8
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp31
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td29
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td28
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp6
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp2
-rw-r--r--lib/Target/PowerPC/PPCMIPeephole.cpp20
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.cpp5
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h73
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp73
-rw-r--r--lib/Target/PowerPC/PPCScheduleP8.td2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp24
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp43
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h1
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h14
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp14
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h8
-rw-r--r--lib/Target/PowerPC/PPCVSXCopy.cpp4
-rw-r--r--lib/Target/PowerPC/PPCVSXSwapRemoval.cpp59
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp4
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp5
-rw-r--r--lib/Target/RISCV/RISCVInstrFormats.td3
-rw-r--r--lib/Target/RISCV/RISCVTargetMachine.cpp6
-rw-r--r--lib/Target/Sparc/AsmParser/SparcAsmParser.cpp56
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp3
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp5
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h8
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp37
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp11
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp5
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h1
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp23
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp8
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp7
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h5
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp4
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp25
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp15
-rw-r--r--lib/Target/SystemZ/SystemZElimCompare.cpp65
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp115
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h1
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp135
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h31
-rw-r--r--lib/Target/SystemZ/SystemZInstrVector.td40
-rw-r--r--lib/Target/SystemZ/SystemZLongBranch.cpp83
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.h26
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ13.td2
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp8
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp27
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h17
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp549
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h28
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp5
-rw-r--r--lib/Target/TargetMachine.cpp11
-rw-r--r--lib/Target/WebAssembly/CMakeLists.txt2
-rw-r--r--lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp85
-rw-r--r--lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp14
-rw-r--r--lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h2
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp107
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h31
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp34
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h9
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp47
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp24
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h54
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp166
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h43
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp92
-rw-r--r--lib/Target/WebAssembly/README.txt21
-rw-r--r--lib/Target/WebAssembly/WebAssembly.h1
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp107
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.h77
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCFGSort.cpp277
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp237
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp25
-rw-r--r--lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp66
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFastISel.cpp30
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp98
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.cpp8
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.h2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrCall.td12
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrControl.td5
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrFloat.td4
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp6
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.td18
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrMemory.td4
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp35
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp130
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMCInstLower.h6
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyPeephole.cpp44
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegStackify.cpp34
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp1302
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h37
-rw-r--r--lib/Target/WebAssembly/WebAssemblyStoreResults.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp26
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp10
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h8
-rw-r--r--lib/Target/WebAssembly/WebAssemblyUtilities.cpp26
-rw-r--r--lib/Target/WebAssembly/WebAssemblyUtilities.h9
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp67
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.h12
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp134
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h24
-rw-r--r--lib/Target/X86/CMakeLists.txt10
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp159
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp37
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h81
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp25
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h7
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp2
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp10
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp27
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp40
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp27
-rw-r--r--lib/Target/X86/X86.h6
-rw-r--r--lib/Target/X86/X86.td52
-rw-r--r--lib/Target/X86/X86AsmPrinter.h4
-rw-r--r--lib/Target/X86/X86CallFrameOptimization.cpp75
-rw-r--r--lib/Target/X86/X86CallLowering.cpp183
-rw-r--r--lib/Target/X86/X86CallLowering.h8
-rw-r--r--lib/Target/X86/X86CallingConv.td2
-rwxr-xr-xlib/Target/X86/X86EvexToVex.cpp40
-rw-r--r--lib/Target/X86/X86ExpandPseudo.cpp26
-rw-r--r--lib/Target/X86/X86FastISel.cpp109
-rw-r--r--lib/Target/X86/X86FixupBWInsts.cpp53
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp18
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp117
-rw-r--r--lib/Target/X86/X86FrameLowering.h3
-rw-r--r--lib/Target/X86/X86GenRegisterBankInfo.def104
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp117
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp4020
-rw-r--r--lib/Target/X86/X86ISelLowering.h61
-rw-r--r--lib/Target/X86/X86Instr3DNow.td22
-rw-r--r--lib/Target/X86/X86InstrAVX512.td1290
-rw-r--r--lib/Target/X86/X86InstrBuilder.h2
-rw-r--r--lib/Target/X86/X86InstrCMovSetCC.td6
-rw-r--r--lib/Target/X86/X86InstrCompiler.td20
-rw-r--r--lib/Target/X86/X86InstrControl.td31
-rw-r--r--lib/Target/X86/X86InstrFMA.td16
-rw-r--r--lib/Target/X86/X86InstrFMA3Info.cpp5
-rw-r--r--lib/Target/X86/X86InstrFMA3Info.h11
-rw-r--r--lib/Target/X86/X86InstrFPStack.td16
-rw-r--r--lib/Target/X86/X86InstrFormats.td10
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td105
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp1565
-rw-r--r--lib/Target/X86/X86InstrInfo.h46
-rw-r--r--lib/Target/X86/X86InstrInfo.td64
-rw-r--r--lib/Target/X86/X86InstrMMX.td39
-rw-r--r--lib/Target/X86/X86InstrMPX.td10
-rw-r--r--lib/Target/X86/X86InstrSSE.td1266
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td82
-rw-r--r--lib/Target/X86/X86InstrSystem.td25
-rw-r--r--lib/Target/X86/X86InstrTSX.td10
-rwxr-xr-xlib/Target/X86/X86InstrTablesInfo.h1162
-rw-r--r--lib/Target/X86/X86InstrVMX.td20
-rw-r--r--lib/Target/X86/X86InstrXOP.td195
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp516
-rw-r--r--lib/Target/X86/X86InterleavedAccess.cpp3
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h147
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp142
-rw-r--r--lib/Target/X86/X86LegalizerInfo.h43
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp96
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.cpp8
-rw-r--r--lib/Target/X86/X86MacroFusion.cpp271
-rw-r--r--lib/Target/X86/X86MacroFusion.h30
-rw-r--r--lib/Target/X86/X86OptimizeLEAs.cpp18
-rw-r--r--lib/Target/X86/X86RegisterBankInfo.cpp243
-rw-r--r--lib/Target/X86/X86RegisterBankInfo.h81
-rw-r--r--lib/Target/X86/X86RegisterBanks.td17
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp10
-rw-r--r--lib/Target/X86/X86RegisterInfo.td36
-rw-r--r--lib/Target/X86/X86Schedule.td1
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp10
-rw-r--r--lib/Target/X86/X86ShuffleDecodeConstantPool.cpp78
-rw-r--r--lib/Target/X86/X86Subtarget.cpp32
-rw-r--r--lib/Target/X86/X86Subtarget.h71
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp121
-rw-r--r--lib/Target/X86/X86TargetMachine.h16
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp106
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.h19
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp118
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp6
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.h6
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp16
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h8
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp2
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp3
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h1
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.h44
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.cpp10
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp18
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h9
710 files changed, 103253 insertions, 38620 deletions
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index fd106a8d9b0b..b44b13e36e15 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -22,8 +22,11 @@
namespace llvm {
+class AArch64RegisterBankInfo;
+class AArch64Subtarget;
class AArch64TargetMachine;
class FunctionPass;
+class InstructionSelector;
class MachineFunctionPass;
FunctionPass *createAArch64DeadRegisterDefinitions();
@@ -45,6 +48,9 @@ FunctionPass *createAArch64A53Fix835769();
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
FunctionPass *createAArch64CollectLOHPass();
+InstructionSelector *
+createAArch64InstructionSelector(const AArch64TargetMachine &,
+ AArch64Subtarget &, AArch64RegisterBankInfo &);
void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 91c335fac32d..519ca2894683 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -27,7 +27,7 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
- "Enable cryptographic instructions">;
+ "Enable cryptographic instructions", [FeatureNEON]>;
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable ARMv8 CRC-32 checksum instructions">;
@@ -38,6 +38,9 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
"Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
+def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
+ "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
+
def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable ARMv8 PMUv3 Performance Monitors extension">;
@@ -100,6 +103,14 @@ def FeatureArithmeticCbzFusion : SubtargetFeature<
"arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
"CPU fuses arithmetic + cbz/cbnz operations">;
+def FeatureFuseAES : SubtargetFeature<
+ "fuse-aes", "HasFuseAES", "true",
+ "CPU fuses AES crypto operations">;
+
+def FeatureFuseLiterals : SubtargetFeature<
+ "fuse-literals", "HasFuseLiterals", "true",
+ "CPU fuses literal generation operations">;
+
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
@@ -108,12 +119,22 @@ def FeatureUseRSqrt : SubtargetFeature<
"use-reciprocal-square-root", "UseRSqrt", "true",
"Use the reciprocal square root approximation">;
+def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
+ "NegativeImmediates", "false",
+ "Convert immediates and instructions "
+ "to their negated or complemented "
+ "equivalent when the immediate does "
+ "not fit in the encoding.">;
+
+def FeatureLSLFast : SubtargetFeature<
+ "lsl-fast", "HasLSLFast", "true",
+ "CPU has a fastpath logical shift of up to 3 places">;
//===----------------------------------------------------------------------===//
// Architectures.
//
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
- "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE]>;
+ "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM]>;
def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
"Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>;
@@ -123,6 +144,7 @@ def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
//===----------------------------------------------------------------------===//
include "AArch64RegisterInfo.td"
+include "AArch64RegisterBanks.td"
include "AArch64CallingConvention.td"
//===----------------------------------------------------------------------===//
@@ -149,7 +171,8 @@ include "AArch64SchedCyclone.td"
include "AArch64SchedFalkor.td"
include "AArch64SchedKryo.td"
include "AArch64SchedM1.td"
-include "AArch64SchedVulcan.td"
+include "AArch64SchedThunderX.td"
+include "AArch64SchedThunderX2T99.td"
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors", [
@@ -180,6 +203,8 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureFuseLiterals,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
@@ -226,6 +251,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
+ FeatureFuseAES,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
@@ -256,7 +282,8 @@ def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
- FeatureZCZeroing
+ FeatureZCZeroing,
+ FeatureLSLFast
]>;
def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
@@ -269,19 +296,66 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
- FeatureZCZeroing
+ FeatureRDM,
+ FeatureZCZeroing,
+ FeatureLSLFast
]>;
-def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
- "Broadcom Vulcan processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureArithmeticBccFusion,
- FeatureNEON,
- FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- HasV8_1aOps]>;
+def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
+ "ThunderX2T99",
+ "Cavium ThunderX2 processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureArithmeticBccFusion,
+ FeatureNEON,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureLSE,
+ HasV8_1aOps]>;
+
+def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def ProcThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
+ "ThunderXT88",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def ProcThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
+ "ThunderXT81",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
+ "ThunderXT83",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
def : ProcessorModel<"generic", NoSchedModel, [
FeatureCRC,
@@ -291,11 +365,11 @@ def : ProcessorModel<"generic", NoSchedModel, [
FeaturePostRAScheduler
]>;
-// FIXME: Cortex-A35 is currently modelled as a Cortex-A53
+// FIXME: Cortex-A35 is currently modeled as a Cortex-A53.
def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
-// FIXME: Cortex-A72 and Cortex-A73 are currently modelled as an Cortex-A57.
+// FIXME: Cortex-A72 and Cortex-A73 are currently modeled as a Cortex-A57.
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>;
def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
@@ -304,7 +378,13 @@ def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
def : ProcessorModel<"exynos-m3", ExynosM1Model, [ProcExynosM2]>;
def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
-def : ProcessorModel<"vulcan", VulcanModel, [ProcVulcan]>;
+// Cavium ThunderX/ThunderX T8X Processors
+def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>;
+def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>;
+def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>;
+def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
+// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
+def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
//===----------------------------------------------------------------------===//
// Assembly parser
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 0aa597bcdc56..4a7e0b2b803e 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -493,43 +493,30 @@ bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
MachineBasicBlock &MBB) {
- RegScavenger RS;
- RS.enterBasicBlock(MBB);
- RS.forward(MachineBasicBlock::iterator(G->getStart()));
-
// Can we find an appropriate register that is available throughout the life
- // of the chain?
- unsigned RegClassID = G->getStart()->getDesc().OpInfo[0].RegClass;
- BitVector AvailableRegs = RS.getRegsAvailable(TRI->getRegClass(RegClassID));
- for (MachineBasicBlock::iterator I = G->begin(), E = G->end(); I != E; ++I) {
- RS.forward(I);
- AvailableRegs &= RS.getRegsAvailable(TRI->getRegClass(RegClassID));
-
- // Remove any registers clobbered by a regmask or any def register that is
- // immediately dead.
- for (auto J : I->operands()) {
- if (J.isRegMask())
- AvailableRegs.clearBitsNotInMask(J.getRegMask());
-
- if (J.isReg() && J.isDef()) {
- MCRegAliasIterator AI(J.getReg(), TRI, /*IncludeSelf=*/true);
- if (J.isDead())
- for (; AI.isValid(); ++AI)
- AvailableRegs.reset(*AI);
-#ifndef NDEBUG
- else
- for (; AI.isValid(); ++AI)
- assert(!AvailableRegs[*AI] &&
- "Non-dead def should have been removed by now!");
-#endif
- }
- }
+ // of the chain? Simulate liveness backwards until the end of the chain.
+ LiveRegUnits Units(*TRI);
+ Units.addLiveOuts(MBB);
+ MachineBasicBlock::iterator I = MBB.end();
+ MachineBasicBlock::iterator ChainEnd = G->end();
+ while (I != ChainEnd) {
+ --I;
+ Units.stepBackward(*I);
}
+ // Check which register units are alive throughout the chain.
+ MachineBasicBlock::iterator ChainBegin = G->begin();
+ assert(ChainBegin != ChainEnd && "Chain should contain instructions");
+ do {
+ --I;
+ Units.accumulateBackward(*I);
+ } while (I != ChainBegin);
+
// Make sure we allocate in-order, to get the cheapest registers first.
+ unsigned RegClassID = ChainBegin->getDesc().OpInfo[0].RegClass;
auto Ord = RCI.getOrder(TRI->getRegClass(RegClassID));
for (auto Reg : Ord) {
- if (!AvailableRegs[Reg])
+ if (!Units.available(Reg))
continue;
if (C == getColor(Reg))
return Reg;
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 0cbb2db1134a..e1b8ee6d03c3 100644
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -31,16 +31,23 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
using namespace llvm;
@@ -59,12 +66,12 @@ EnableMerge("aarch64-type-promotion-merge", cl::Hidden,
//===----------------------------------------------------------------------===//
namespace {
-class AArch64AddressTypePromotion : public FunctionPass {
+class AArch64AddressTypePromotion : public FunctionPass {
public:
static char ID;
- AArch64AddressTypePromotion()
- : FunctionPass(ID), Func(nullptr), ConsideredSExtType(nullptr) {
+
+ AArch64AddressTypePromotion() : FunctionPass(ID) {
initializeAArch64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
}
@@ -76,10 +83,11 @@ public:
private:
/// The current function.
- Function *Func;
+ Function *Func = nullptr;
+
/// Filter out all sexts that does not have this type.
/// Currently initialized with Int64Ty.
- Type *ConsideredSExtType;
+ Type *ConsideredSExtType = nullptr;
// This transformation requires dominator info.
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -129,7 +137,8 @@ private:
void mergeSExts(ValueToInsts &ValToSExtendedUses,
SetOfInstructions &ToRemove);
};
-} // end anonymous namespace.
+
+} // end anonymous namespace
char AArch64AddressTypePromotion::ID = 0;
diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp
index a4950af32097..b2f55a7e1e09 100644
--- a/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/lib/Target/AArch64/AArch64CallLowering.cpp - Call lowering ---===//
+//===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,15 +15,36 @@
#include "AArch64CallLowering.h"
#include "AArch64ISelLowering.h"
-
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+
using namespace llvm;
#ifndef LLVM_BUILD_GLOBAL_ISEL
@@ -31,12 +52,12 @@ using namespace llvm;
#endif
AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
- : CallLowering(&TLI) {
-}
+ : CallLowering(&TLI) {}
struct IncomingArgHandler : public CallLowering::ValueHandler {
- IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
- : ValueHandler(MIRBuilder, MRI) {}
+ IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -45,6 +66,7 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
MIRBuilder.buildFrameIndex(AddrReg, FI);
+ StackUsed = std::max(StackUsed, Size + Offset);
return AddrReg;
}
@@ -67,11 +89,14 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
/// parameters (it's a basic-block live-in), and a call instruction
/// (it's an implicit-def of the BL).
virtual void markPhysRegUsed(unsigned PhysReg) = 0;
+
+ uint64_t StackUsed;
};
struct FormalArgHandler : public IncomingArgHandler {
- FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
- : IncomingArgHandler(MIRBuilder, MRI) {}
+ FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIRBuilder.getMBB().addLiveIn(PhysReg);
@@ -80,8 +105,8 @@ struct FormalArgHandler : public IncomingArgHandler {
struct CallReturnHandler : public IncomingArgHandler {
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder MIB)
- : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn)
+ : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
@@ -92,8 +117,10 @@ struct CallReturnHandler : public IncomingArgHandler {
struct OutgoingArgHandler : public CallLowering::ValueHandler {
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder MIB)
- : ValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn,
+ CCAssignFn *AssignFnVarArg)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
+ AssignFnVarArg(AssignFnVarArg), StackSize(0) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -126,14 +153,29 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
MIRBuilder.buildStore(ValVReg, Addr, *MMO);
}
+ bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ const CallLowering::ArgInfo &Info,
+ CCState &State) override {
+ bool Res;
+ if (Info.IsFixed)
+ Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+ else
+ Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+
+ StackSize = State.getNextStackOffset();
+ return Res;
+ }
+
MachineInstrBuilder MIB;
+ CCAssignFn *AssignFnVarArg;
+ uint64_t StackSize;
};
-void AArch64CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
- SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL,
- MachineRegisterInfo &MRI,
- SplitArgTy PerformArgSplit) const {
+void AArch64CallLowering::splitToValueTypes(
+ const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, MachineRegisterInfo &MRI,
+ const SplitArgTy &PerformArgSplit) const {
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
LLVMContext &Ctx = OrigArg.Ty->getContext();
@@ -145,7 +187,7 @@ void AArch64CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
// No splitting to do, but we want to replace the original type (e.g. [1 x
// double] -> double).
SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
- OrigArg.Flags);
+ OrigArg.Flags, OrigArg.IsFixed);
return;
}
@@ -154,19 +196,12 @@ void AArch64CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
// FIXME: set split flags if they're actually used (e.g. i128 on AAPCS).
Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
SplitArgs.push_back(
- ArgInfo{MRI.createGenericVirtualRegister(LLT{*SplitTy, DL}), SplitTy,
- OrigArg.Flags});
+ ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
+ SplitTy, OrigArg.Flags, OrigArg.IsFixed});
}
- SmallVector<uint64_t, 4> BitOffsets;
- for (auto Offset : Offsets)
- BitOffsets.push_back(Offset * 8);
-
- SmallVector<unsigned, 8> SplitRegs;
- for (auto I = &SplitArgs[FirstRegIdx]; I != SplitArgs.end(); ++I)
- SplitRegs.push_back(I->Reg);
-
- PerformArgSplit(SplitRegs, BitOffsets);
+ for (unsigned i = 0; i < Offsets.size(); ++i)
+ PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8);
}
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
@@ -184,16 +219,16 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
auto &DL = F.getParent()->getDataLayout();
ArgInfo OrigArg{VReg, Val->getType()};
- setArgFlags(OrigArg, AttributeSet::ReturnIndex, DL, F);
+ setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
SmallVector<ArgInfo, 8> SplitArgs;
splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
- MIRBuilder.buildExtract(Regs, Offsets, VReg);
+ [&](unsigned Reg, uint64_t Offset) {
+ MIRBuilder.buildExtract(Reg, VReg, Offset);
});
- OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
- Success = handleAssignments(MIRBuilder, AssignFn, SplitArgs, Handler);
+ OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
+ Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
}
MIRBuilder.insertInstr(MIB);
@@ -203,7 +238,6 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<unsigned> VRegs) const {
- auto &Args = F.getArgumentList();
MachineFunction &MF = MIRBuilder.getMF();
MachineBasicBlock &MBB = MIRBuilder.getMBB();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -211,13 +245,27 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 8> SplitArgs;
unsigned i = 0;
- for (auto &Arg : Args) {
+ for (auto &Arg : F.args()) {
ArgInfo OrigArg{VRegs[i], Arg.getType()};
setArgFlags(OrigArg, i + 1, DL, F);
+ bool Split = false;
+ LLT Ty = MRI.getType(VRegs[i]);
+ unsigned Dst = VRegs[i];
+
splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
- MIRBuilder.buildSequence(VRegs[i], Regs, Offsets);
+ [&](unsigned Reg, uint64_t Offset) {
+ if (!Split) {
+ Split = true;
+ Dst = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildUndef(Dst);
+ }
+ unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset);
+ Dst = Tmp;
});
+
+ if (Dst != VRegs[i])
+ MIRBuilder.buildCopy(VRegs[i], Dst);
++i;
}
@@ -228,10 +276,25 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CCAssignFn *AssignFn =
TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
- FormalArgHandler Handler(MIRBuilder, MRI);
- if (!handleAssignments(MIRBuilder, AssignFn, SplitArgs, Handler))
+ FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
+ if (F.isVarArg()) {
+ if (!MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
+ // FIXME: we need to reimplement saveVarArgsRegisters from
+ // AArch64ISelLowering.
+ return false;
+ }
+
+ // We currently pass all varargs at 8-byte alignment.
+ uint64_t StackOffset = alignTo(Handler.StackUsed, 8);
+
+ auto &MFI = MIRBuilder.getMF().getFrameInfo();
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
+ }
+
// Move back to the end of the basic block.
MIRBuilder.setMBB(MBB);
@@ -239,6 +302,7 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
}
bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+ CallingConv::ID CallConv,
const MachineOperand &Callee,
const ArgInfo &OrigRet,
ArrayRef<ArgInfo> OrigArgs) const {
@@ -250,21 +314,25 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 8> SplitArgs;
for (auto &OrigArg : OrigArgs) {
splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
- MIRBuilder.buildExtract(Regs, Offsets, OrigArg.Reg);
+ [&](unsigned Reg, uint64_t Offset) {
+ MIRBuilder.buildExtract(Reg, OrigArg.Reg, Offset);
});
}
// Find out which ABI gets to decide where things go.
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
- CCAssignFn *CallAssignFn =
- TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
+ CCAssignFn *AssignFnFixed =
+ TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
+ CCAssignFn *AssignFnVarArg =
+ TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/true);
+
+ auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
auto MIB = MIRBuilder.buildInstrNoInsert(Callee.isReg() ? AArch64::BLR
: AArch64::BL);
- MIB.addOperand(Callee);
+ MIB.add(Callee);
// Tell the call which registers are clobbered.
auto TRI = MF.getSubtarget().getRegisterInfo();
@@ -272,8 +340,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Do the actual argument marshalling.
SmallVector<unsigned, 8> PhysRegs;
- OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
- if (!handleAssignments(MIRBuilder, CallAssignFn, SplitArgs, Handler))
+ OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
+ AssignFnVarArg);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
// Now we can add the actual call instruction to the correct basic block.
@@ -298,20 +367,23 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<uint64_t, 8> RegOffsets;
SmallVector<unsigned, 8> SplitRegs;
splitToValueTypes(OrigRet, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
- std::copy(Offsets.begin(), Offsets.end(),
- std::back_inserter(RegOffsets));
- std::copy(Regs.begin(), Regs.end(),
- std::back_inserter(SplitRegs));
+ [&](unsigned Reg, uint64_t Offset) {
+ RegOffsets.push_back(Offset);
+ SplitRegs.push_back(Reg);
});
- CallReturnHandler Handler(MIRBuilder, MRI, MIB);
- if (!handleAssignments(MIRBuilder, RetAssignFn, SplitArgs, Handler))
+ CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
if (!RegOffsets.empty())
MIRBuilder.buildSequence(OrigRet.Reg, SplitRegs, RegOffsets);
}
+ CallSeqStart.addImm(Handler.StackSize);
+ MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
+ .addImm(Handler.StackSize)
+ .addImm(0);
+
return true;
}
diff --git a/lib/Target/AArch64/AArch64CallLowering.h b/lib/Target/AArch64/AArch64CallLowering.h
index ce6676249df6..d96ce95c4de0 100644
--- a/lib/Target/AArch64/AArch64CallLowering.h
+++ b/lib/Target/AArch64/AArch64CallLowering.h
@@ -1,4 +1,4 @@
-//===-- llvm/lib/Target/AArch64/AArch64CallLowering.h - Call lowering -----===//
+//===--- AArch64CallLowering.h - Call lowering ------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,18 +12,20 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING
-#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/ValueTypes.h"
+#include <cstdint>
+#include <functional>
namespace llvm {
class AArch64TargetLowering;
class AArch64CallLowering: public CallLowering {
- public:
+public:
AArch64CallLowering(const AArch64TargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
@@ -32,8 +34,8 @@ class AArch64CallLowering: public CallLowering {
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
- bool lowerCall(MachineIRBuilder &MIRBuilder, const MachineOperand &Callee,
- const ArgInfo &OrigRet,
+ bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
+ const MachineOperand &Callee, const ArgInfo &OrigRet,
ArrayRef<ArgInfo> OrigArgs) const override;
private:
@@ -44,13 +46,14 @@ private:
typedef std::function<void(MachineIRBuilder &, int, CCValAssign &)>
MemHandler;
- typedef std::function<void(ArrayRef<unsigned>, ArrayRef<uint64_t>)>
- SplitArgTy;
+ typedef std::function<void(unsigned, uint64_t)> SplitArgTy;
void splitToValueTypes(const ArgInfo &OrigArgInfo,
SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL, MachineRegisterInfo &MRI,
- SplitArgTy SplitArg) const;
+ const SplitArgTy &SplitArg) const;
};
-} // End of namespace llvm;
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING_H
diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 8b186328d125..2dfcd2d1c393 100644
--- a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -265,10 +265,10 @@ void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
// Change immediate in comparison instruction (ADDS or SUBS).
BuildMI(*MBB, CmpMI, CmpMI->getDebugLoc(), TII->get(Opc))
- .addOperand(CmpMI->getOperand(0))
- .addOperand(CmpMI->getOperand(1))
+ .add(CmpMI->getOperand(0))
+ .add(CmpMI->getOperand(1))
.addImm(Imm)
- .addOperand(CmpMI->getOperand(3));
+ .add(CmpMI->getOperand(3));
CmpMI->eraseFromParent();
// The fact that this comparison was picked ensures that it's related to the
@@ -278,7 +278,7 @@ void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
// Change condition in branch instruction.
BuildMI(*MBB, BrMI, BrMI.getDebugLoc(), TII->get(AArch64::Bcc))
.addImm(Cmp)
- .addOperand(BrMI.getOperand(1));
+ .add(BrMI.getOperand(1));
BrMI.eraseFromParent();
MBB->updateTerminator();
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index da09b36cac9c..00a0111f2bd2 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -594,7 +594,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
// Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
BuildMI(*Head, Head->end(), TermDL, MCID)
.addReg(DestReg, RegState::Define | RegState::Dead)
- .addOperand(HeadCond[2])
+ .add(HeadCond[2])
.addImm(0)
.addImm(0);
// SUBS uses the GPR*sp register classes.
@@ -650,13 +650,12 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
if (CmpMI->getOperand(FirstOp + 1).isReg())
MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
TII->getRegClass(MCID, 1, TRI, *MF));
- MachineInstrBuilder MIB =
- BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
- .addOperand(CmpMI->getOperand(FirstOp)); // Register Rn
+ MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
+ .add(CmpMI->getOperand(FirstOp)); // Register Rn
if (isZBranch)
MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
else
- MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
+ MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
MIB.addImm(NZCV).addImm(HeadCmpBBCC);
// If CmpMI was a terminator, we need a new conditional branch to replace it.
@@ -666,7 +665,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
CmpMI->getOpcode() == AArch64::CBNZX;
BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
.addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
- .addOperand(CmpMI->getOperand(1)); // Branch target.
+ .add(CmpMI->getOperand(1)); // Branch target.
}
CmpMI->eraseFromParent();
Head->updateTerminator();
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index fe1c0beee0eb..d0c0956b87ca 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -70,9 +71,9 @@ static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
const MachineOperand &MO = OldMI.getOperand(i);
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
- UseMI.addOperand(MO);
+ UseMI.add(MO);
else
- DefMI.addOperand(MO);
+ DefMI.add(MO);
}
}
@@ -112,7 +113,7 @@ static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
// Create the ORR-immediate instruction.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AArch64::XZR)
.addImm(Encoding);
@@ -179,7 +180,7 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
// Create the ORR-immediate instruction.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AArch64::XZR)
.addImm(Encoding);
@@ -362,7 +363,7 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AArch64::XZR)
.addImm(Encoding);
@@ -425,7 +426,7 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
.addImm(Encoding);
transferImpOps(MI, MIB, MIB);
@@ -539,15 +540,15 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
if (Imm != 0) {
unsigned LZ = countLeadingZeros(Imm);
unsigned TZ = countTrailingZeros(Imm);
- Shift = ((63 - LZ) / 16) * 16;
- LastShift = (TZ / 16) * 16;
+ Shift = (TZ / 16) * 16;
+ LastShift = ((63 - LZ) / 16) * 16;
}
unsigned Imm16 = (Imm >> Shift) & Mask;
bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
.addReg(DstReg, RegState::Define |
- getDeadRegState(DstIsDead && Shift == LastShift))
+ getDeadRegState(DstIsDead && Shift == LastShift))
.addImm(Imm16)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
@@ -564,15 +565,15 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
MachineInstrBuilder MIB2;
unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
- while (Shift != LastShift) {
- Shift -= 16;
+ while (Shift < LastShift) {
+ Shift += 16;
Imm16 = (Imm >> Shift) & Mask;
if (Imm16 == (isNeg ? Mask : 0))
continue; // This 16-bit portion is already set correctly.
MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
.addReg(DstReg,
RegState::Define |
- getDeadRegState(DstIsDead && Shift == LastShift))
+ getDeadRegState(DstIsDead && Shift == LastShift))
.addReg(DstReg)
.addImm(Imm16)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
@@ -627,7 +628,7 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
.addReg(Addr.getReg());
BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
.addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
- .addOperand(Desired)
+ .add(Desired)
.addImm(ExtendImm);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
.addImm(AArch64CC::NE)
@@ -643,9 +644,7 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
StoreBB->addLiveIn(New.getReg());
addPostLoopLiveIns(StoreBB, LiveRegs);
- BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
- .addOperand(New)
- .addOperand(Addr);
+ BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg).add(New).add(Addr);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
.addReg(StatusReg, RegState::Kill)
.addMBB(LoadCmpBB);
@@ -710,7 +709,7 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
.addReg(Addr.getReg());
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
- .addOperand(DesiredLo)
+ .add(DesiredLo)
.addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(AArch64::WZR)
@@ -718,7 +717,7 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
.addImm(AArch64CC::EQ);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
- .addOperand(DesiredHi)
+ .add(DesiredHi)
.addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(StatusReg, RegState::Kill)
@@ -738,9 +737,9 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
StoreBB->addLiveIn(NewHi.getReg());
addPostLoopLiveIns(StoreBB, LiveRegs);
BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
- .addOperand(NewLo)
- .addOperand(NewHi)
- .addOperand(Addr);
+ .add(NewLo)
+ .add(NewHi)
+ .add(Addr);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
.addReg(StatusReg, RegState::Kill)
.addMBB(LoadCmpBB);
@@ -825,8 +824,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
transferImpOps(MI, MIB1, MIB1);
MI.eraseFromParent();
@@ -842,7 +841,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(DstReg);
if (MO1.isGlobal()) {
@@ -878,19 +877,31 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned DstReg = MI.getOperand(0).getReg();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
- .addOperand(MI.getOperand(1));
+ .add(MI.getOperand(1));
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(DstReg)
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(2))
.addImm(0);
transferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
+ case AArch64::MOVbaseTLS: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ auto SysReg = AArch64SysReg::TPIDR_EL0;
+ MachineFunction *MF = MBB.getParent();
+ if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
+ MF->getTarget().getCodeModel() == CodeModel::Kernel)
+ SysReg = AArch64SysReg::TPIDR_EL1;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
+ .addImm(SysReg);
+ MI.eraseFromParent();
+ return true;
+ }
case AArch64::MOVi32imm:
return expandMOVImm(MBB, MBBI, 32);
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index fe2c2d4550a7..4e5e3e43a468 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -15,28 +15,62 @@
#include "AArch64.h"
#include "AArch64CallingConvention.h"
+#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
-#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
namespace {
@@ -50,48 +84,55 @@ class AArch64FastISel final : public FastISel {
} BaseKind;
private:
- BaseKind Kind;
- AArch64_AM::ShiftExtendType ExtType;
+ BaseKind Kind = RegBase;
+ AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
union {
unsigned Reg;
int FI;
} Base;
- unsigned OffsetReg;
- unsigned Shift;
- int64_t Offset;
- const GlobalValue *GV;
+ unsigned OffsetReg = 0;
+ unsigned Shift = 0;
+ int64_t Offset = 0;
+ const GlobalValue *GV = nullptr;
public:
- Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
- OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
+ Address() { Base.Reg = 0; }
+
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
+
void setReg(unsigned Reg) {
assert(isRegBase() && "Invalid base register access!");
Base.Reg = Reg;
}
+
unsigned getReg() const {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
+
void setOffsetReg(unsigned Reg) {
OffsetReg = Reg;
}
+
unsigned getOffsetReg() const {
return OffsetReg;
}
+
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
}
+
unsigned getFI() const {
assert(isFIBase() && "Invalid base frame index access!");
return Base.FI;
}
+
void setOffset(int64_t O) { Offset = O; }
int64_t getOffset() { return Offset; }
void setShift(unsigned S) { Shift = S; }
@@ -417,7 +458,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
// MachO still uses GOT for large code-model accesses, but ELF requires
// movz/movk sequences, which FastISel doesn't handle yet.
- if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
+ if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
return 0;
unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
@@ -531,23 +572,23 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
switch (Opcode) {
default:
break;
- case Instruction::BitCast: {
+ case Instruction::BitCast:
// Look through bitcasts.
return computeAddress(U->getOperand(0), Addr, Ty);
- }
- case Instruction::IntToPtr: {
+
+ case Instruction::IntToPtr:
// Look past no-op inttoptrs.
if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr, Ty);
break;
- }
- case Instruction::PtrToInt: {
+
+ case Instruction::PtrToInt:
// Look past no-op ptrtoints.
if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr, Ty);
break;
- }
+
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
uint64_t TmpOffset = Addr.getOffset();
@@ -563,7 +604,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
- for (;;) {
+ while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
@@ -2813,8 +2854,8 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
- assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
- "Unexpected value type.");
+ assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
+ "Unexpected value type.");
unsigned SrcReg = getRegForValue(I->getOperand(0));
if (!SrcReg)
@@ -3106,8 +3147,8 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
return false;
CodeModel::Model CM = TM.getCodeModel();
- // Only support the small and large code model.
- if (CM != CodeModel::Small && CM != CodeModel::Large)
+ // Only support the small-addressing and large code models.
+ if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
return false;
// FIXME: Add large code model support for ELF.
@@ -3158,7 +3199,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Issue the call.
MachineInstrBuilder MIB;
- if (CM == CodeModel::Small) {
+ if (Subtarget->useSmallAddressing()) {
const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
if (Symbol)
@@ -3369,8 +3410,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
MFI.setFrameAddressIsTaken(true);
- const AArch64RegisterInfo *RegInfo =
- static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo());
+ const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -3521,11 +3561,11 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, ResultReg);
return true;
}
- case Intrinsic::trap: {
+ case Intrinsic::trap:
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
.addImm(1);
return true;
- }
+
case Intrinsic::sqrt: {
Type *RetTy = II->getCalledFunction()->getReturnType();
@@ -5092,8 +5132,10 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
}
namespace llvm {
-llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
+
+FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {
return new AArch64FastISel(FuncInfo, LibInfo);
}
-}
+
+} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index f5b8c35375f8..550174b22a89 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -90,21 +90,42 @@
#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <vector>
using namespace llvm;
@@ -245,14 +266,13 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
if (&MF->front() == MBB)
return AArch64::X9;
- const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
- LivePhysRegs LiveRegs(&TRI);
+ const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ LivePhysRegs LiveRegs(TRI);
LiveRegs.addLiveIns(*MBB);
// Mark callee saved registers as used so we will not choose them.
- const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
- const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF);
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF);
for (unsigned i = 0; CSRegs[i]; ++i)
LiveRegs.addReg(CSRegs[i]);
@@ -319,7 +339,6 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
-
unsigned NewOpc;
bool NewIsUnscaled = false;
switch (MBBI->getOpcode()) {
@@ -362,7 +381,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
unsigned OpndIdx = 0;
for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
++OpndIdx)
- MIB.addOperand(MBBI->getOperand(OpndIdx));
+ MIB.add(MBBI->getOperand(OpndIdx));
assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
"Unexpected immediate offset in first/last callee-save save/restore "
@@ -863,22 +882,26 @@ static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
static bool produceCompactUnwindFrame(MachineFunction &MF) {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- AttributeSet Attrs = MF.getFunction()->getAttributes();
+ AttributeList Attrs = MF.getFunction()->getAttributes();
return Subtarget.isTargetMachO() &&
!(Subtarget.getTargetLowering()->supportSwiftError() &&
Attrs.hasAttrSomewhere(Attribute::SwiftError));
}
namespace {
+
struct RegPairInfo {
- RegPairInfo() : Reg1(AArch64::NoRegister), Reg2(AArch64::NoRegister) {}
- unsigned Reg1;
- unsigned Reg2;
+ unsigned Reg1 = AArch64::NoRegister;
+ unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
bool IsGPR;
+
+ RegPairInfo() = default;
+
bool isPaired() const { return Reg2 != AArch64::NoRegister; }
};
+
} // end anonymous namespace
static void computeCalleeSaveRegisterPairs(
diff --git a/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
index d472a54d9543..8b1c9740d2ad 100644
--- a/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
+++ b/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -16,281 +16,198 @@
#endif
namespace llvm {
-namespace AArch64 {
-
-const uint32_t GPRCoverageData[] = {
- // Classes 0-31
- (1u << AArch64::GPR32allRegClassID) | (1u << AArch64::GPR32RegClassID) |
- (1u << AArch64::GPR32spRegClassID) |
- (1u << AArch64::GPR32commonRegClassID) |
- (1u << AArch64::GPR32sponlyRegClassID) |
- (1u << AArch64::GPR64allRegClassID) | (1u << AArch64::GPR64RegClassID) |
- (1u << AArch64::GPR64spRegClassID) |
- (1u << AArch64::GPR64commonRegClassID) |
- (1u << AArch64::tcGPR64RegClassID) |
- (1u << AArch64::GPR64sponlyRegClassID),
- // Classes 32-63
- 0,
- // FIXME: The entries below this point can be safely removed once this is
- // tablegenerated. It's only needed because of the hardcoded register class
- // limit.
- // Classes 64-96
- 0,
- // Classes 97-128
- 0,
- // Classes 129-160
- 0,
- // Classes 161-192
- 0,
- // Classes 193-224
- 0,
-};
-
-const uint32_t FPRCoverageData[] = {
- // Classes 0-31
- (1u << AArch64::FPR8RegClassID) | (1u << AArch64::FPR16RegClassID) |
- (1u << AArch64::FPR32RegClassID) | (1u << AArch64::FPR64RegClassID) |
- (1u << AArch64::DDRegClassID) | (1u << AArch64::FPR128RegClassID) |
- (1u << AArch64::FPR128_loRegClassID) | (1u << AArch64::DDDRegClassID) |
- (1u << AArch64::DDDDRegClassID),
- // Classes 32-63
- (1u << (AArch64::QQRegClassID - 32)) |
- (1u << (AArch64::QQ_with_qsub0_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQ_with_qsub1_in_FPR128_loRegClassID - 32)) |
- (1u
- << (AArch64::
- QQQ_with_qsub1_in_FPR128_lo_and_QQQ_with_qsub2_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQ_with_qsub0_in_FPR128_lo_and_QQQ_with_qsub2_in_FPR128_loRegClassID -
- 32)) |
- (1u << (AArch64::QQQQRegClassID - 32)) |
- (1u << (AArch64::QQQQ_with_qsub0_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQQ_with_qsub1_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQQ_with_qsub2_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQQ_with_qsub3_in_FPR128_loRegClassID - 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub0_in_FPR128_lo_and_QQQQ_with_qsub1_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub1_in_FPR128_lo_and_QQQQ_with_qsub2_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub2_in_FPR128_lo_and_QQQQ_with_qsub3_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub0_in_FPR128_lo_and_QQQQ_with_qsub2_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub1_in_FPR128_lo_and_QQQQ_with_qsub3_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub0_in_FPR128_lo_and_QQQQ_with_qsub3_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQ_with_qsub0_in_FPR128_lo_and_QQ_with_qsub1_in_FPR128_loRegClassID -
- 32)) |
- (1u << (AArch64::QQQRegClassID - 32)) |
- (1u << (AArch64::QQQ_with_qsub0_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQ_with_qsub1_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQ_with_qsub2_in_FPR128_loRegClassID - 32)) |
- (1u
- << (AArch64::
- QQQ_with_qsub0_in_FPR128_lo_and_QQQ_with_qsub1_in_FPR128_loRegClassID -
- 32)),
- // FIXME: The entries below this point can be safely removed once this
- // is tablegenerated. It's only needed because of the hardcoded register
- // class limit.
- // Classes 64-96
- 0,
- // Classes 97-128
- 0,
- // Classes 129-160
- 0,
- // Classes 161-192
- 0,
- // Classes 193-224
- 0,
-};
-
-const uint32_t CCRCoverageData[] = {
- // Classes 0-31
- 1u << AArch64::CCRRegClassID,
- // Classes 32-63
- 0,
- // FIXME: The entries below this point can be safely removed once this
- // is tablegenerated. It's only needed because of the hardcoded register
- // class limit.
- // Classes 64-96
- 0,
- // Classes 97-128
- 0,
- // Classes 129-160
- 0,
- // Classes 161-192
- 0,
- // Classes 193-224
- 0,
-};
-
-RegisterBank GPRRegBank(AArch64::GPRRegBankID, "GPR", 64, GPRCoverageData);
-RegisterBank FPRRegBank(AArch64::FPRRegBankID, "FPR", 512, FPRCoverageData);
-RegisterBank CCRRegBank(AArch64::CCRRegBankID, "CCR", 32, CCRCoverageData);
-
-RegisterBank *RegBanks[] = {&GPRRegBank, &FPRRegBank, &CCRRegBank};
-
-// PartialMappings.
-enum PartialMappingIdx {
- PMI_None = -1,
- PMI_GPR32 = 1,
- PMI_GPR64,
- PMI_FPR32,
- PMI_FPR64,
- PMI_FPR128,
- PMI_FPR256,
- PMI_FPR512,
- PMI_FirstGPR = PMI_GPR32,
- PMI_LastGPR = PMI_GPR64,
- PMI_FirstFPR = PMI_FPR32,
- PMI_LastFPR = PMI_FPR512,
- PMI_Min = PMI_FirstGPR,
-};
-
-static unsigned getRegBankBaseIdxOffset(unsigned Size) {
- assert(Size && "0-sized type!!");
- // Make anything smaller than 32 gets 32
- Size = ((Size + 31) / 32) * 32;
- // 32 is 0, 64 is 1, 128 is 2, and so on.
- return Log2_32(Size) - /*Log2_32(32)=*/ 5;
-}
-
-RegisterBankInfo::PartialMapping PartMappings[] {
- /* StartIdx, Length, RegBank */
- // 0: GPR 32-bit value.
- {0, 32, GPRRegBank},
- // 1: GPR 64-bit value.
- {0, 64, GPRRegBank},
- // 2: FPR 32-bit value.
- {0, 32, FPRRegBank},
- // 3: FPR 64-bit value.
- {0, 64, FPRRegBank},
- // 4: FPR 128-bit value.
- {0, 128, FPRRegBank},
- // 5: FPR 256-bit value.
- {0, 256, FPRRegBank},
- // 6: FPR 512-bit value.
- {0, 512, FPRRegBank}
-};
-
-enum ValueMappingIdx {
- First3OpsIdx = 0,
- Last3OpsIdx = 18,
- DistanceBetweenRegBanks = 3,
- FirstCrossRegCpyIdx = 21,
- LastCrossRegCpyIdx = 27,
- DistanceBetweenCrossRegCpy = 2
+RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{
+ /* StartIdx, Length, RegBank */
+ // 0: FPR 32-bit value.
+ {0, 32, AArch64::FPRRegBank},
+ // 1: FPR 64-bit value.
+ {0, 64, AArch64::FPRRegBank},
+ // 2: FPR 128-bit value.
+ {0, 128, AArch64::FPRRegBank},
+ // 3: FPR 256-bit value.
+ {0, 256, AArch64::FPRRegBank},
+ // 4: FPR 512-bit value.
+ {0, 512, AArch64::FPRRegBank},
+ // 5: GPR 32-bit value.
+ {0, 32, AArch64::GPRRegBank},
+ // 6: GPR 64-bit value.
+ {0, 64, AArch64::GPRRegBank},
};
// ValueMappings.
-RegisterBankInfo::ValueMapping ValMappings[]{
+RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
/* BreakDown, NumBreakDowns */
+ // 0: invalid
+ {nullptr, 0},
// 3-operands instructions (all binary operations should end up with one of
// those mapping).
- // 0: GPR 32-bit value. <-- This must match First3OpsIdx.
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- // 3: GPR 64-bit value.
- {&PartMappings[PMI_GPR64 - PMI_Min], 1},
- {&PartMappings[PMI_GPR64 - PMI_Min], 1},
- {&PartMappings[PMI_GPR64 - PMI_Min], 1},
- // 6: FPR 32-bit value.
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- // 9: FPR 64-bit value.
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- // 12: FPR 128-bit value.
- {&PartMappings[PMI_FPR128 - PMI_Min], 1},
- {&PartMappings[PMI_FPR128 - PMI_Min], 1},
- {&PartMappings[PMI_FPR128 - PMI_Min], 1},
- // 15: FPR 256-bit value.
- {&PartMappings[PMI_FPR256 - PMI_Min], 1},
- {&PartMappings[PMI_FPR256 - PMI_Min], 1},
- {&PartMappings[PMI_FPR256 - PMI_Min], 1},
- // 18: FPR 512-bit value. <-- This must match Last3OpsIdx.
- {&PartMappings[PMI_FPR512 - PMI_Min], 1},
- {&PartMappings[PMI_FPR512 - PMI_Min], 1},
- {&PartMappings[PMI_FPR512 - PMI_Min], 1},
+ // 1: FPR 32-bit value. <-- This must match First3OpsIdx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ // 4: FPR 64-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ // 7: FPR 128-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+ // 10: FPR 256-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+ // 13: FPR 512-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+ // 16: GPR 32-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ // 19: GPR 64-bit value. <-- This must match Last3OpsIdx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
// Cross register bank copies.
- // 21: GPR 32-bit value to FPR 32-bit value. <-- This must match
+ // 22: FPR 32-bit value to GPR 32-bit value. <-- This must match
// FirstCrossRegCpyIdx.
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- // 23: GPR 64-bit value to FPR 64-bit value.
- {&PartMappings[PMI_GPR64 - PMI_Min], 1},
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- // 25: FPR 32-bit value to GPR 32-bit value.
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- // 27: FPR 64-bit value to GPR 64-bit value. <-- This must match
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ // 24: FPR 64-bit value to GPR 64-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+ // 26: FPR 128-bit value to GPR 128-bit value (invalid)
+ {nullptr, 1},
+ {nullptr, 1},
+ // 28: FPR 256-bit value to GPR 256-bit value (invalid)
+ {nullptr, 1},
+ {nullptr, 1},
+ // 30: FPR 512-bit value to GPR 512-bit value (invalid)
+ {nullptr, 1},
+ {nullptr, 1},
+ // 32: GPR 32-bit value to FPR 32-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ // 34: GPR 64-bit value to FPR 64-bit value. <-- This must match
// LastCrossRegCpyIdx.
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- {&PartMappings[PMI_GPR64 - PMI_Min], 1}
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
};
-/// Get the pointer to the ValueMapping representing the RegisterBank
-/// at \p RBIdx with a size of \p Size.
-///
-/// The returned mapping works for instructions with the same kind of
-/// operands for up to 3 operands.
-///
-/// \pre \p RBIdx != PartialMappingIdx::None
+bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx,
+ unsigned ValStartIdx,
+ unsigned ValLength,
+ const RegisterBank &RB) {
+ const PartialMapping &Map = PartMappings[Idx - PartialMappingIdx::PMI_Min];
+ return Map.StartIdx == ValStartIdx && Map.Length == ValLength &&
+ Map.RegBank == &RB;
+}
+
+bool AArch64GenRegisterBankInfo::checkValueMapImpl(unsigned Idx,
+ unsigned FirstInBank,
+ unsigned Size,
+ unsigned Offset) {
+ unsigned PartialMapBaseIdx = Idx - PartialMappingIdx::PMI_Min;
+ const ValueMapping &Map =
+ AArch64GenRegisterBankInfo::getValueMapping((PartialMappingIdx)FirstInBank, Size)[Offset];
+ return Map.BreakDown == &PartMappings[PartialMapBaseIdx] &&
+ Map.NumBreakDowns == 1;
+}
+
+bool AArch64GenRegisterBankInfo::checkPartialMappingIdx(
+ PartialMappingIdx FirstAlias, PartialMappingIdx LastAlias,
+ ArrayRef<PartialMappingIdx> Order) {
+ if (Order.front() != FirstAlias)
+ return false;
+ if (Order.back() != LastAlias)
+ return false;
+ if (Order.front() > Order.back())
+ return false;
+
+ PartialMappingIdx Previous = Order.front();
+ bool First = true;
+ for (const auto &Current : Order) {
+ if (First) {
+ First = false;
+ continue;
+ }
+ if (Previous + 1 != Current)
+ return false;
+ Previous = Current;
+ }
+ return true;
+}
+
+unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
+ unsigned Size) {
+ if (RBIdx == PMI_FirstGPR) {
+ if (Size <= 32)
+ return 0;
+ if (Size <= 64)
+ return 1;
+ return -1;
+ }
+ if (RBIdx == PMI_FirstFPR) {
+ if (Size <= 32)
+ return 0;
+ if (Size <= 64)
+ return 1;
+ if (Size <= 128)
+ return 2;
+ if (Size <= 256)
+ return 3;
+ if (Size <= 512)
+ return 4;
+ return -1;
+ }
+ return -1;
+}
+
const RegisterBankInfo::ValueMapping *
-getValueMapping(PartialMappingIdx RBIdx, unsigned Size) {
+AArch64GenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx,
+ unsigned Size) {
assert(RBIdx != PartialMappingIdx::PMI_None && "No mapping needed for that");
- unsigned ValMappingIdx = First3OpsIdx +
- (RBIdx - AArch64::PartialMappingIdx::PMI_Min +
- getRegBankBaseIdxOffset(Size)) *
- ValueMappingIdx::DistanceBetweenRegBanks;
- assert(ValMappingIdx >= AArch64::First3OpsIdx &&
- ValMappingIdx <= AArch64::Last3OpsIdx && "Mapping out of bound");
+ unsigned BaseIdxOffset = getRegBankBaseIdxOffset(RBIdx, Size);
+ if (BaseIdxOffset == -1u)
+ return &ValMappings[InvalidIdx];
+
+ unsigned ValMappingIdx =
+ First3OpsIdx + (RBIdx - PartialMappingIdx::PMI_Min + BaseIdxOffset) *
+ ValueMappingIdx::DistanceBetweenRegBanks;
+ assert(ValMappingIdx >= First3OpsIdx && ValMappingIdx <= Last3OpsIdx &&
+ "Mapping out of bound");
return &ValMappings[ValMappingIdx];
}
-/// Get the pointer to the ValueMapping of the operands of a copy
-/// instruction from a GPR or FPR register to a GPR or FPR register
-/// with a size of \p Size.
-///
-/// If \p DstIsGPR is true, the destination of the copy is on GPR,
-/// otherwise it is on FPR. Same thing for \p SrcIsGPR.
+AArch64GenRegisterBankInfo::PartialMappingIdx
+ AArch64GenRegisterBankInfo::BankIDToCopyMapIdx[]{
+ PMI_None, // CCR
+ PMI_FirstFPR, // FPR
+ PMI_FirstGPR, // GPR
+ };
+
const RegisterBankInfo::ValueMapping *
-getCopyMapping(bool DstIsGPR, bool SrcIsGPR, unsigned Size) {
- PartialMappingIdx DstRBIdx = DstIsGPR ? PMI_FirstGPR : PMI_FirstFPR;
- PartialMappingIdx SrcRBIdx = SrcIsGPR ? PMI_FirstGPR : PMI_FirstFPR;
+AArch64GenRegisterBankInfo::getCopyMapping(unsigned DstBankID,
+ unsigned SrcBankID, unsigned Size) {
+ assert(DstBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
+ assert(SrcBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
+ PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID];
+ PartialMappingIdx SrcRBIdx = BankIDToCopyMapIdx[SrcBankID];
+ assert(DstRBIdx != PMI_None && "No such mapping");
+ assert(SrcRBIdx != PMI_None && "No such mapping");
+
if (DstRBIdx == SrcRBIdx)
return getValueMapping(DstRBIdx, Size);
+
assert(Size <= 64 && "GPR cannot handle that size");
unsigned ValMappingIdx =
FirstCrossRegCpyIdx +
- (DstRBIdx - PMI_Min + getRegBankBaseIdxOffset(Size)) *
+ (DstRBIdx - PMI_Min + getRegBankBaseIdxOffset(DstRBIdx, Size)) *
ValueMappingIdx::DistanceBetweenCrossRegCpy;
- assert(ValMappingIdx >= AArch64::FirstCrossRegCpyIdx &&
- ValMappingIdx <= AArch64::LastCrossRegCpyIdx &&
- "Mapping out of bound");
+ assert(ValMappingIdx >= FirstCrossRegCpyIdx &&
+ ValMappingIdx <= LastCrossRegCpyIdx && "Mapping out of bound");
return &ValMappings[ValMappingIdx];
}
-
-} // End AArch64 namespace.
} // End llvm namespace.
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 3099383e5b32..ae01ea477bb9 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -328,11 +328,52 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
}
}
+/// \brief Determine whether it is worth it to fold SHL into the addressing
+/// mode.
+static bool isWorthFoldingSHL(SDValue V) {
+ assert(V.getOpcode() == ISD::SHL && "invalid opcode");
+ // It is worth folding logical shift of up to three places.
+ auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ if (!CSD)
+ return false;
+ unsigned ShiftVal = CSD->getZExtValue();
+ if (ShiftVal > 3)
+ return false;
+
+ // Check if this particular node is reused in any non-memory related
+ // operation. If yes, do not try to fold this node into the address
+ // computation, since the computation will be kept.
+ const SDNode *Node = V.getNode();
+ for (SDNode *UI : Node->uses())
+ if (!isa<MemSDNode>(*UI))
+ for (SDNode *UII : UI->uses())
+ if (!isa<MemSDNode>(*UII))
+ return false;
+ return true;
+}
+
/// \brief Determine whether it is worth to fold V into an extended register.
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
- // it hurts if the value is used at least twice, unless we are optimizing
- // for code size.
- return ForCodeSize || V.hasOneUse();
+ // Trivial if we are optimizing for code size or if there is only
+ // one use of the value.
+ if (ForCodeSize || V.hasOneUse())
+ return true;
+ // If a subtarget has a fastpath LSL we can fold a logical shift into
+ // the addressing mode and save a cycle.
+ if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
+ isWorthFoldingSHL(V))
+ return true;
+ if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
+ const SDValue LHS = V.getOperand(0);
+ const SDValue RHS = V.getOperand(1);
+ if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
+ return true;
+ if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
+ return true;
+ }
+
+ // It hurts otherwise, since the value will be reused.
+ return false;
}
/// SelectShiftedRegister - Select a "shifted register" operand. If the value
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 849058bdfbdb..0d3289ac84c3 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -554,8 +555,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setSchedulingPreference(Sched::Hybrid);
- // Enable TBZ/TBNZ
- MaskAndBranchFoldingIsLegal = true;
EnableExtLdPromotion = true;
// Set required alignment.
@@ -793,7 +792,7 @@ EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
/// KnownZero/KnownOne bitsets.
void AArch64TargetLowering::computeKnownBitsForTargetNode(
const SDValue Op, APInt &KnownZero, APInt &KnownOne,
- const SelectionDAG &DAG, unsigned Depth) const {
+ const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
@@ -2113,8 +2112,8 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
Entry.Node = Arg;
Entry.Ty = ArgTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
const char *LibcallName =
@@ -2124,8 +2123,9 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -2231,19 +2231,13 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
}
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::SIGN_EXTEND)
- return true;
- if (isExtendedBUILD_VECTOR(N, DAG, true))
- return true;
- return false;
+ return N->getOpcode() == ISD::SIGN_EXTEND ||
+ isExtendedBUILD_VECTOR(N, DAG, true);
}
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::ZERO_EXTEND)
- return true;
- if (isExtendedBUILD_VECTOR(N, DAG, false))
- return true;
- return false;
+ return N->getOpcode() == ISD::ZERO_EXTEND ||
+ isExtendedBUILD_VECTOR(N, DAG, false);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
@@ -3578,7 +3572,7 @@ SDValue
AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
- assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
+ assert(Subtarget->useSmallAddressing() &&
"ELF TLS only supported in small memory model");
// Different choices can be made for the maximum size of the TLS area for a
// module. For the small address model, the default TLS size is 16MiB and the
@@ -3679,7 +3673,7 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
if (Subtarget->isTargetDarwin())
return LowerDarwinGlobalTLSAddress(Op, DAG);
- else if (Subtarget->isTargetELF())
+ if (Subtarget->isTargetELF())
return LowerELFGlobalTLSAddress(Op, DAG);
llvm_unreachable("Unexpected platform trying to use TLS");
@@ -4516,7 +4510,12 @@ unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("sp", AArch64::SP)
+ .Case("x18", AArch64::X18)
+ .Case("w18", AArch64::W18)
.Default(0);
+ if ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
+ !Subtarget->isX18Reserved())
+ Reg = 0;
if (Reg)
return Reg;
report_fatal_error(Twine("Invalid register name \""
@@ -6591,21 +6590,20 @@ FailedModImm:
if (!isConstant && !usesOnlyOneValue) {
SDValue Vec = DAG.getUNDEF(VT);
SDValue Op0 = Op.getOperand(0);
- unsigned ElemSize = VT.getScalarSizeInBits();
unsigned i = 0;
- // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to
+
+ // Use SCALAR_TO_VECTOR for lane zero to
// a) Avoid a RMW dependency on the full vector register, and
// b) Allow the register coalescer to fold away the copy if the
- // value is already in an S or D register.
- // Do not do this for UNDEF/LOAD nodes because we have better patterns
- // for those avoiding the SCALAR_TO_VECTOR/BUILD_VECTOR.
- if (!Op0.isUndef() && Op0.getOpcode() != ISD::LOAD &&
- (ElemSize == 32 || ElemSize == 64)) {
- unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
- MachineSDNode *N =
- DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
- DAG.getTargetConstant(SubIdx, dl, MVT::i32));
- Vec = SDValue(N, 0);
+ // value is already in an S or D register, and we're forced to emit an
+ // INSERT_SUBREG that we can't fold anywhere.
+ //
+ // We also allow types like i8 and i16 which are illegal scalar but legal
+ // vector element types. After type-legalization the inserted value is
+ // extended (i32) and it is safe to cast them to the vector type by ignoring
+ // the upper bits of the lowest lane (e.g. v8i8, v4i16).
+ if (!Op0.isUndef()) {
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
++i;
}
for (; i < NumElts; ++i) {
@@ -7249,6 +7247,33 @@ bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
return NumBits == 32 || NumBits == 64;
}
+/// A helper function for determining the number of interleaved accesses we
+/// will generate when lowering accesses of the given type.
+unsigned
+AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const {
+ return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
+}
+
+bool AArch64TargetLowering::isLegalInterleavedAccessType(
+ VectorType *VecTy, const DataLayout &DL) const {
+
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+
+ // Ensure the number of vector elements is greater than 1.
+ if (VecTy->getNumElements() < 2)
+ return false;
+
+ // Ensure the element type is legal.
+ if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
+ return false;
+
+ // Ensure the total vector size is 64 or a multiple of 128. Types larger than
+ // 128 will be split into multiple interleaved accesses.
+ return VecSize == 64 || VecSize % 128 == 0;
+}
+
/// \brief Lower an interleaved load into a ldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -7272,12 +7297,15 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
const DataLayout &DL = LI->getModule()->getDataLayout();
VectorType *VecTy = Shuffles[0]->getType();
- unsigned VecSize = DL.getTypeSizeInBits(VecTy);
- // Skip if we do not have NEON and skip illegal vector types.
- if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128))
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
return false;
+ unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
+
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
Type *EltTy = VecTy->getVectorElementType();
@@ -7285,6 +7313,25 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
VecTy =
VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
+ IRBuilder<> Builder(LI);
+
+ // The base address of the load.
+ Value *BaseAddr = LI->getPointerOperand();
+
+ if (NumLoads > 1) {
+ // If we're going to generate more than one load, reset the sub-vector type
+ // to something legal.
+ VecTy = VectorType::get(VecTy->getVectorElementType(),
+ VecTy->getVectorNumElements() / NumLoads);
+
+ // We will compute the pointer operand of each load from the original base
+ // address using GEPs. Cast the base address to a pointer to the scalar
+ // element type.
+ BaseAddr = Builder.CreateBitCast(
+ BaseAddr, VecTy->getVectorElementType()->getPointerTo(
+ LI->getPointerAddressSpace()));
+ }
+
Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
Type *Tys[2] = {VecTy, PtrTy};
static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
@@ -7293,39 +7340,49 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
Function *LdNFunc =
Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
- IRBuilder<> Builder(LI);
- Value *Ptr = Builder.CreateBitCast(LI->getPointerOperand(), PtrTy);
+ // Holds sub-vectors extracted from the load intrinsic return values. The
+ // sub-vectors are associated with the shufflevector instructions they will
+ // replace.
+ DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
- CallInst *LdN = Builder.CreateCall(LdNFunc, Ptr, "ldN");
+ for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
- // Replace uses of each shufflevector with the corresponding vector loaded
- // by ldN.
- for (unsigned i = 0; i < Shuffles.size(); i++) {
- ShuffleVectorInst *SVI = Shuffles[i];
- unsigned Index = Indices[i];
+ // If we're generating more than one load, compute the base address of
+ // subsequent loads as an offset from the previous.
+ if (LoadCount > 0)
+ BaseAddr = Builder.CreateConstGEP1_32(
+ BaseAddr, VecTy->getVectorNumElements() * Factor);
- Value *SubVec = Builder.CreateExtractValue(LdN, Index);
+ CallInst *LdN = Builder.CreateCall(
+ LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
- // Convert the integer vector to pointer vector if the element is pointer.
- if (EltTy->isPointerTy())
- SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
+ // Extract and store the sub-vectors returned by the load intrinsic.
+ for (unsigned i = 0; i < Shuffles.size(); i++) {
+ ShuffleVectorInst *SVI = Shuffles[i];
+ unsigned Index = Indices[i];
- SVI->replaceAllUsesWith(SubVec);
- }
+ Value *SubVec = Builder.CreateExtractValue(LdN, Index);
- return true;
-}
+ // Convert the integer vector to pointer vector if the element is pointer.
+ if (EltTy->isPointerTy())
+ SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
-/// \brief Get a mask consisting of sequential integers starting from \p Start.
-///
-/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
-static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
- unsigned NumElts) {
- SmallVector<Constant *, 16> Mask;
- for (unsigned i = 0; i < NumElts; i++)
- Mask.push_back(Builder.getInt32(Start + i));
+ SubVecs[SVI].push_back(SubVec);
+ }
+ }
+
+ // Replace uses of the shufflevector instructions with the sub-vectors
+ // returned by the load intrinsic. If a shufflevector instruction is
+ // associated with more than one sub-vector, those sub-vectors will be
+ // concatenated into a single wide vector.
+ for (ShuffleVectorInst *SVI : Shuffles) {
+ auto &SubVec = SubVecs[SVI];
+ auto *WideVec =
+ SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
+ SVI->replaceAllUsesWith(WideVec);
+ }
- return ConstantVector::get(Mask);
+ return true;
}
/// \brief Lower an interleaved store into a stN intrinsic.
@@ -7369,12 +7426,15 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
- // Skip if we do not have NEON and skip illegal vector types.
- if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128))
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
return false;
+ unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
+
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
IRBuilder<> Builder(SI);
@@ -7394,6 +7454,25 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
SubVecTy = VectorType::get(IntTy, LaneLen);
}
+ // The base address of the store.
+ Value *BaseAddr = SI->getPointerOperand();
+
+ if (NumStores > 1) {
+ // If we're going to generate more than one store, reset the lane length
+ // and sub-vector type to something legal.
+ LaneLen /= NumStores;
+ SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
+
+ // We will compute the pointer operand of each store from the original base
+ // address using GEPs. Cast the base address to a pointer to the scalar
+ // element type.
+ BaseAddr = Builder.CreateBitCast(
+ BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
+ SI->getPointerAddressSpace()));
+ }
+
+ auto Mask = SVI->getShuffleMask();
+
Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
Type *Tys[2] = {SubVecTy, PtrTy};
static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
@@ -7402,34 +7481,43 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
Function *StNFunc =
Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
- SmallVector<Value *, 5> Ops;
+ for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
- // Split the shufflevector operands into sub vectors for the new stN call.
- auto Mask = SVI->getShuffleMask();
- for (unsigned i = 0; i < Factor; i++) {
- if (Mask[i] >= 0) {
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
- } else {
- unsigned StartMask = 0;
- for (unsigned j = 1; j < LaneLen; j++) {
- if (Mask[j*Factor + i] >= 0) {
- StartMask = Mask[j*Factor + i] - j;
- break;
+ SmallVector<Value *, 5> Ops;
+
+ // Split the shufflevector operands into sub vectors for the new stN call.
+ for (unsigned i = 0; i < Factor; i++) {
+ unsigned IdxI = StoreCount * LaneLen * Factor + i;
+ if (Mask[IdxI] >= 0) {
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
+ } else {
+ unsigned StartMask = 0;
+ for (unsigned j = 1; j < LaneLen; j++) {
+ unsigned IdxJ = StoreCount * LaneLen * Factor + j;
+ if (Mask[IdxJ * Factor + IdxI] >= 0) {
+ StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
+ break;
+ }
}
+ // Note: Filling undef gaps with random elements is ok, since
+ // those elements were being written anyway (with undefs).
+ // In the case of all undefs we're defaulting to using elems from 0
+ // Note: StartMask cannot be negative, it's checked in
+ // isReInterleaveMask
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
}
- // Note: If all elements in a chunk are undefs, StartMask=0!
- // Note: Filling undef gaps with random elements is ok, since
- // those elements were being written anyway (with undefs).
- // In the case of all undefs we're defaulting to using elems from 0
- // Note: StartMask cannot be negative, it's checked in isReInterleaveMask
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
}
- }
- Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy));
- Builder.CreateCall(StNFunc, Ops);
+ // If we generating more than one store, we compute the base address of
+ // subsequent stores as an offset from the previous.
+ if (StoreCount > 0)
+ BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
+
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
+ Builder.CreateCall(StNFunc, Ops);
+ }
return true;
}
@@ -7690,7 +7778,7 @@ SDValue
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
std::vector<SDNode *> *Created) const {
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
@@ -9267,7 +9355,7 @@ static SDValue performSTORECombine(SDNode *N,
return SDValue();
}
- /// This function handles the log2-shuffle pattern produced by the
+/// This function handles the log2-shuffle pattern produced by the
/// LoopVectorizer for the across vector reduction. It consists of
/// log2(NumVectorElements) steps and, in each step, 2^(s) elements
/// are reduced, where s is an induction variable from 0 to
@@ -10483,9 +10571,9 @@ void AArch64TargetLowering::ReplaceNodeResults(
}
bool AArch64TargetLowering::useLoadStackGuardNode() const {
- if (!Subtarget->isTargetAndroid())
- return true;
- return TargetLowering::useLoadStackGuardNode();
+ if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
+ return TargetLowering::useLoadStackGuardNode();
+ return true;
}
unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
@@ -10623,36 +10711,56 @@ bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
return false;
}
-Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
- if (!Subtarget->isTargetAndroid())
- return TargetLowering::getIRStackGuard(IRB);
-
- // Android provides a fixed TLS slot for the stack cookie. See the definition
- // of TLS_SLOT_STACK_GUARD in
- // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
- const unsigned TlsOffset = 0x28;
+static Value *UseTlsOffset(IRBuilder<> &IRB, unsigned Offset) {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
return IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset),
+ IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), Offset),
Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
}
-Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
- if (!Subtarget->isTargetAndroid())
- return TargetLowering::getSafeStackPointerLocation(IRB);
+Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
+ // Android provides a fixed TLS slot for the stack cookie. See the definition
+ // of TLS_SLOT_STACK_GUARD in
+ // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
+ if (Subtarget->isTargetAndroid())
+ return UseTlsOffset(IRB, 0x28);
+ // Fuchsia is similar.
+ // <magenta/tls.h> defines MX_TLS_STACK_GUARD_OFFSET with this value.
+ if (Subtarget->isTargetFuchsia())
+ return UseTlsOffset(IRB, -0x10);
+
+ return TargetLowering::getIRStackGuard(IRB);
+}
+
+Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
// Android provides a fixed TLS slot for the SafeStack pointer. See the
// definition of TLS_SLOT_SAFESTACK in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
- const unsigned TlsOffset = 0x48;
- Module *M = IRB.GetInsertBlock()->getParent()->getParent();
- Function *ThreadPointerFunc =
- Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
- return IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset),
- Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
+ if (Subtarget->isTargetAndroid())
+ return UseTlsOffset(IRB, 0x48);
+
+ // Fuchsia is similar.
+ // <magenta/tls.h> defines MX_TLS_UNSAFE_SP_OFFSET with this value.
+ if (Subtarget->isTargetFuchsia())
+ return UseTlsOffset(IRB, -0x8);
+
+ return TargetLowering::getSafeStackPointerLocation(IRB);
+}
+
+bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
+ const Instruction &AndI) const {
+ // Only sink 'and' mask to cmp use block if it is masking a single bit, since
+ // this is likely to be fold the and/cmp/br into a single tbz instruction. It
+ // may be beneficial to sink in other cases, but we would have to check that
+ // the cmp would not get folded into the br to form a cbz for these to be
+ // beneficial.
+ ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
+ if (!Mask)
+ return false;
+ return Mask->getUniqueInteger().isPowerOf2();
}
void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
@@ -10702,7 +10810,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
}
}
-bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
+bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// Integer division on AArch64 is expensive. However, when aggressively
// optimizing for code size, we prefer to use a div instruction, as it is
// usually smaller than the alternative sequence.
@@ -10711,6 +10819,14 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
bool OptSize =
- Attr.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ Attr.hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize);
return OptSize && !VT.isVector();
}
+
+unsigned
+AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
+ if (Subtarget->isTargetDarwin())
+ return getPointerTy(DL).getSizeInBits();
+
+ return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 054ccc31674f..2ad6c8b23df8 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -251,7 +251,8 @@ public:
/// Determine which of the bits specified in Mask are known to be either zero
/// or one and return them in the KnownZero/KnownOne bitsets.
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
- APInt &KnownOne, const SelectionDAG &DAG,
+ APInt &KnownOne, const APInt &DemandedElts,
+ const SelectionDAG &DAG,
unsigned Depth = 0) const override;
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
@@ -402,7 +403,7 @@ public:
return AArch64::X1;
}
- bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
+ bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool isCheapToSpeculateCttz() const override {
return true;
@@ -412,6 +413,8 @@ public:
return true;
}
+ bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+
bool hasAndNotCompare(SDValue) const override {
// 'bics'
return true;
@@ -435,6 +438,20 @@ public:
return true;
}
+ /// Returns the size of the platform's va_list object.
+ unsigned getVaListSizeInBits(const DataLayout &DL) const override;
+
+ /// Returns true if \p VecTy is a legal interleaved access type. This
+ /// function checks the vector element type and the overall width of the
+ /// vector.
+ bool isLegalInterleavedAccessType(VectorType *VecTy,
+ const DataLayout &DL) const;
+
+ /// Returns the number of interleaved accesses that will be generated when
+ /// lowering accesses of the given type.
+ unsigned getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const;
+
private:
bool isExtFreeImpl(const Instruction *Ext) const override;
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index cefdf51b50d2..16be4432b160 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -39,6 +39,9 @@ class AArch64Inst<Format f, string cstr> : Instruction {
let Constraints = cstr;
}
+class InstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>, Requires<[UseNegativeImmediates]>;
+
// Pseudo instructions (don't have encoding information)
class Pseudo<dag oops, dag iops, list<dag> pattern, string cstr = "">
: AArch64Inst<PseudoFrm, cstr> {
@@ -257,6 +260,7 @@ def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;
class AsmImmRange<int Low, int High> : AsmOperandClass {
let Name = "Imm" # Low # "_" # High;
let DiagnosticType = "InvalidImm" # Low # "_" # High;
+ let PredicateMethod = "isImmInRange<" # Low # "," # High # ">";
}
def Imm1_8Operand : AsmImmRange<1, 8>;
@@ -264,6 +268,20 @@ def Imm1_16Operand : AsmImmRange<1, 16>;
def Imm1_32Operand : AsmImmRange<1, 32>;
def Imm1_64Operand : AsmImmRange<1, 64>;
+class BranchTarget<int N> : AsmOperandClass {
+ let Name = "BranchTarget" # N;
+ let DiagnosticType = "InvalidLabel";
+ let PredicateMethod = "isBranchTarget<" # N # ">";
+}
+
+class PCRelLabel<int N> : BranchTarget<N> {
+ let Name = "PCRelLabel" # N;
+}
+
+def BranchTarget14Operand : BranchTarget<14>;
+def BranchTarget26Operand : BranchTarget<26>;
+def PCRelLabel19Operand : PCRelLabel<19>;
+
def MovZSymbolG3AsmOperand : AsmOperandClass {
let Name = "MovZSymbolG3";
let RenderMethod = "addImmOperands";
@@ -500,7 +518,8 @@ def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
}
// imm0_255 predicate - True if the immediate is in the range [0,255].
-def Imm0_255Operand : AsmOperandClass { let Name = "Imm0_255"; }
+def Imm0_255Operand : AsmImmRange<0,255>;
+
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 256;
}]> {
@@ -673,6 +692,14 @@ def addsub_shifted_imm64 : addsub_shifted_imm<i64>;
def addsub_shifted_imm32_neg : addsub_shifted_imm_neg<i32>;
def addsub_shifted_imm64_neg : addsub_shifted_imm_neg<i64>;
+def gi_addsub_shifted_imm32 :
+ GIComplexOperandMatcher<s32, (ops i32imm, i32imm), "selectArithImmed">,
+ GIComplexPatternEquiv<addsub_shifted_imm32>;
+
+def gi_addsub_shifted_imm64 :
+ GIComplexOperandMatcher<s64, (ops i32imm, i32imm), "selectArithImmed">,
+ GIComplexPatternEquiv<addsub_shifted_imm64>;
+
class neg_addsub_shifted_imm<ValueType Ty>
: Operand<Ty>, ComplexPattern<Ty, 2, "SelectNegArithImmed", [imm]> {
let PrintMethod = "printAddSubImm";
@@ -1094,10 +1121,6 @@ def inv_ccode : Operand<i32> {
// Conditional branch target. 19-bit immediate. The low two bits of the target
// offset are implied zero and so are not part of the immediate.
-def PCRelLabel19Operand : AsmOperandClass {
- let Name = "PCRelLabel19";
- let DiagnosticType = "InvalidLabel";
-}
def am_brcond : Operand<OtherVT> {
let EncoderMethod = "getCondBranchTargetOpValue";
let DecoderMethod = "DecodePCRelLabel19";
@@ -1154,9 +1177,6 @@ multiclass CmpBranch<bit op, string asm, SDNode node> {
//---
// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of
// the target offset are implied zero and so are not part of the immediate.
-def BranchTarget14Operand : AsmOperandClass {
- let Name = "BranchTarget14";
-}
def am_tbrcond : Operand<OtherVT> {
let EncoderMethod = "getTestBranchTargetOpValue";
let PrintMethod = "printAlignedLabel";
@@ -1166,11 +1186,12 @@ def am_tbrcond : Operand<OtherVT> {
// AsmOperand classes to emit (or not) special diagnostics
def TBZImm0_31Operand : AsmOperandClass {
let Name = "TBZImm0_31";
- let PredicateMethod = "isImm0_31";
+ let PredicateMethod = "isImmInRange<0,31>";
let RenderMethod = "addImm0_31Operands";
}
def TBZImm32_63Operand : AsmOperandClass {
let Name = "Imm32_63";
+ let PredicateMethod = "isImmInRange<32,63>";
let DiagnosticType = "InvalidImm0_63";
}
@@ -1232,10 +1253,6 @@ multiclass TestBranch<bit op, string asm, SDNode node> {
//---
// Unconditional branch (immediate) instructions.
//---
-def BranchTarget26Operand : AsmOperandClass {
- let Name = "BranchTarget26";
- let DiagnosticType = "InvalidLabel";
-}
def am_b_target : Operand<OtherVT> {
let EncoderMethod = "getBranchTargetOpValue";
let PrintMethod = "printAlignedLabel";
@@ -1784,10 +1801,10 @@ multiclass AddSub<bit isSub, string mnemonic, string alias,
}
// add Rd, Rb, -imm -> sub Rd, Rn, imm
- def : InstAlias<alias#"\t$Rd, $Rn, $imm",
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn,
addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<alias#"\t$Rd, $Rn, $imm",
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn,
addsub_shifted_imm64_neg:$imm), 0>;
@@ -1859,10 +1876,10 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
} // Defs = [NZCV]
// Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm
- def : InstAlias<alias#"\t$Rd, $Rn, $imm",
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn,
addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<alias#"\t$Rd, $Rn, $imm",
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn,
addsub_shifted_imm64_neg:$imm), 0>;
@@ -1883,9 +1900,9 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>;
// Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm
- def : InstAlias<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
+ def : InstSubst<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
+ def : InstSubst<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>;
// Compare shorthands
@@ -2100,10 +2117,10 @@ multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
let Inst{31} = 1;
}
- def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn,
logical_imm32_not:$imm), 0>;
- def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn,
logical_imm64_not:$imm), 0>;
}
@@ -2122,10 +2139,10 @@ multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode,
}
} // end Defs = [NZCV]
- def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn,
logical_imm32_not:$imm), 0>;
- def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn,
logical_imm64_not:$imm), 0>;
}
@@ -2454,7 +2471,7 @@ class PrefetchUI<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat>
// Load literal address: 19-bit immediate. The low two bits of the target
// offset are implied zero and so are not part of the immediate.
-def am_ldrlit : Operand<OtherVT> {
+def am_ldrlit : Operand<iPTR> {
let EncoderMethod = "getLoadLiteralOpValue";
let DecoderMethod = "DecodePCRelLabel19";
let PrintMethod = "printAlignedLabel";
@@ -9060,7 +9077,7 @@ multiclass SIMDLdSt4SingleAliases<string asm> {
// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract
//----------------------------------------------------------------------------
-let Predicates = [HasNEON, HasV8_1a] in {
+let Predicates = [HasNEON, HasRDM] in {
class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand regtype, string asm,
@@ -9221,7 +9238,7 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
let Inst{21} = idx{0};
}
}
-} // let Predicates = [HasNeon, HasV8_1a]
+} // let Predicates = [HasNeon, HasRDM]
//----------------------------------------------------------------------------
// Crypto extensions
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 4c789926e3e4..41fc8eceab5c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
@@ -369,7 +370,7 @@ void AArch64InstrInfo::instantiateCondBranch(
// Folded compare-and-branch
// Note that we use addOperand instead of addReg to keep the flags.
const MachineInstrBuilder MIB =
- BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
+ BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
if (Cond.size() > 3)
MIB.addImm(Cond[3].getImm());
MIB.addMBB(TBB);
@@ -762,6 +763,17 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
+bool AArch64InstrInfo::isFalkorLSLFast(const MachineInstr &MI) const {
+ if (MI.getNumOperands() < 4)
+ return false;
+ unsigned ShOpVal = MI.getOperand(3).getImm();
+ unsigned ShImm = AArch64_AM::getShiftValue(ShOpVal);
+ if (AArch64_AM::getShiftType(ShOpVal) == AArch64_AM::LSL &&
+ ShImm < 4)
+ return true;
+ return false;
+}
+
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
@@ -1299,16 +1311,16 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addMemOperand(*MI.memoperands_begin());
} else if (TM.getCodeModel() == CodeModel::Large) {
BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
- .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
.addReg(Reg, RegState::Kill)
.addImm(0)
@@ -1345,14 +1357,6 @@ bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
case AArch64::BICSXrs:
case AArch64::BICWrs:
case AArch64::BICXrs:
- case AArch64::CRC32Brr:
- case AArch64::CRC32CBrr:
- case AArch64::CRC32CHrr:
- case AArch64::CRC32CWrr:
- case AArch64::CRC32CXrr:
- case AArch64::CRC32Hrr:
- case AArch64::CRC32Wrr:
- case AArch64::CRC32Xrr:
case AArch64::EONWrs:
case AArch64::EONXrs:
case AArch64::EORWrs:
@@ -1691,16 +1695,59 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
} else
return false;
- // Offset is calculated as the immediate operand multiplied by the scaling factor.
- // Unscaled instructions have scaling factor set to 1.
+ // Get the scaling factor for the instruction and set the width for the
+ // instruction.
unsigned Scale = 0;
- switch (LdSt.getOpcode()) {
+ int64_t Dummy1, Dummy2;
+
+ // If this returns false, then it's an instruction we don't want to handle.
+ if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
+ return false;
+
+ // Compute the offset. Offset is calculated as the immediate operand
+ // multiplied by the scaling factor. Unscaled instructions have scaling factor
+ // set to 1.
+ if (LdSt.getNumExplicitOperands() == 3) {
+ BaseReg = LdSt.getOperand(1).getReg();
+ Offset = LdSt.getOperand(2).getImm() * Scale;
+ } else {
+ assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
+ BaseReg = LdSt.getOperand(2).getReg();
+ Offset = LdSt.getOperand(3).getImm() * Scale;
+ }
+ return true;
+}
+
+MachineOperand&
+AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
+ assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+ MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands()-1);
+ assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
+ return OfsOp;
+}
+
+bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
+ unsigned &Width, int64_t &MinOffset,
+ int64_t &MaxOffset) const {
+ switch (Opcode) {
+ // Not a memory operation or something we want to handle.
default:
+ Scale = Width = 0;
+ MinOffset = MaxOffset = 0;
return false;
+ case AArch64::STRWpost:
+ case AArch64::LDRWpost:
+ Width = 32;
+ Scale = 4;
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
case AArch64::LDURQi:
case AArch64::STURQi:
Width = 16;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDURXi:
case AArch64::LDURDi:
@@ -1708,6 +1755,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STURDi:
Width = 8;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDURWi:
case AArch64::LDURSi:
@@ -1716,6 +1765,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STURSi:
Width = 4;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDURHi:
case AArch64::LDURHHi:
@@ -1725,6 +1776,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STURHHi:
Width = 2;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDURBi:
case AArch64::LDURBBi:
@@ -1734,6 +1787,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STURBBi:
Width = 1;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDPQi:
case AArch64::LDNPQi:
@@ -1741,10 +1796,14 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STNPQi:
Scale = 16;
Width = 32;
+ MinOffset = -64;
+ MaxOffset = 63;
break;
case AArch64::LDRQui:
case AArch64::STRQui:
Scale = Width = 16;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
case AArch64::LDPXi:
case AArch64::LDPDi:
@@ -1756,12 +1815,16 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STNPDi:
Scale = 8;
Width = 16;
+ MinOffset = -64;
+ MaxOffset = 63;
break;
case AArch64::LDRXui:
case AArch64::LDRDui:
case AArch64::STRXui:
case AArch64::STRDui:
Scale = Width = 8;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
case AArch64::LDPWi:
case AArch64::LDPSi:
@@ -1773,6 +1836,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STNPSi:
Scale = 4;
Width = 8;
+ MinOffset = -64;
+ MaxOffset = 63;
break;
case AArch64::LDRWui:
case AArch64::LDRSui:
@@ -1780,29 +1845,27 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STRWui:
case AArch64::STRSui:
Scale = Width = 4;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
case AArch64::LDRHui:
case AArch64::LDRHHui:
case AArch64::STRHui:
case AArch64::STRHHui:
Scale = Width = 2;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
case AArch64::LDRBui:
case AArch64::LDRBBui:
case AArch64::STRBui:
case AArch64::STRBBui:
Scale = Width = 1;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
}
- if (LdSt.getNumExplicitOperands() == 3) {
- BaseReg = LdSt.getOperand(1).getReg();
- Offset = LdSt.getOperand(2).getImm() * Scale;
- } else {
- assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
- BaseReg = LdSt.getOperand(2).getReg();
- Offset = LdSt.getOperand(3).getImm() * Scale;
- }
return true;
}
@@ -1903,88 +1966,6 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
return Offset1 + 1 == Offset2;
}
-bool AArch64InstrInfo::shouldScheduleAdjacent(
- const MachineInstr &First, const MachineInstr &Second) const {
- if (Subtarget.hasArithmeticBccFusion()) {
- // Fuse CMN, CMP, TST followed by Bcc.
- unsigned SecondOpcode = Second.getOpcode();
- if (SecondOpcode == AArch64::Bcc) {
- switch (First.getOpcode()) {
- default:
- return false;
- case AArch64::ADDSWri:
- case AArch64::ADDSWrr:
- case AArch64::ADDSXri:
- case AArch64::ADDSXrr:
- case AArch64::ANDSWri:
- case AArch64::ANDSWrr:
- case AArch64::ANDSXri:
- case AArch64::ANDSXrr:
- case AArch64::SUBSWri:
- case AArch64::SUBSWrr:
- case AArch64::SUBSXri:
- case AArch64::SUBSXrr:
- case AArch64::BICSWrr:
- case AArch64::BICSXrr:
- return true;
- case AArch64::ADDSWrs:
- case AArch64::ADDSXrs:
- case AArch64::ANDSWrs:
- case AArch64::ANDSXrs:
- case AArch64::SUBSWrs:
- case AArch64::SUBSXrs:
- case AArch64::BICSWrs:
- case AArch64::BICSXrs:
- // Shift value can be 0 making these behave like the "rr" variant...
- return !hasShiftedReg(Second);
- }
- }
- }
- if (Subtarget.hasArithmeticCbzFusion()) {
- // Fuse ALU operations followed by CBZ/CBNZ.
- unsigned SecondOpcode = Second.getOpcode();
- if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
- SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
- switch (First.getOpcode()) {
- default:
- return false;
- case AArch64::ADDWri:
- case AArch64::ADDWrr:
- case AArch64::ADDXri:
- case AArch64::ADDXrr:
- case AArch64::ANDWri:
- case AArch64::ANDWrr:
- case AArch64::ANDXri:
- case AArch64::ANDXrr:
- case AArch64::EORWri:
- case AArch64::EORWrr:
- case AArch64::EORXri:
- case AArch64::EORXrr:
- case AArch64::ORRWri:
- case AArch64::ORRWrr:
- case AArch64::ORRXri:
- case AArch64::ORRXrr:
- case AArch64::SUBWri:
- case AArch64::SUBWrr:
- case AArch64::SUBXri:
- case AArch64::SUBXrr:
- return true;
- case AArch64::ADDWrs:
- case AArch64::ADDXrs:
- case AArch64::ANDWrs:
- case AArch64::ANDXrs:
- case AArch64::SUBWrs:
- case AArch64::SUBXrs:
- case AArch64::BICWrs:
- case AArch64::BICXrs:
- // Shift value can be 0 making these behave like the "rr" variant...
- return !hasShiftedReg(Second);
- }
- }
- }
- return false;
-}
-
MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
const MDNode *Expr, const DebugLoc &DL) const {
@@ -3793,7 +3774,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
.addReg(ZeroReg)
- .addOperand(Root.getOperand(2));
+ .add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
@@ -4286,3 +4267,199 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
{MO_TLS, "aarch64-tls"}};
return makeArrayRef(TargetFlags);
}
+
+unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize,
+ size_t Occurrences,
+ bool CanBeTailCall) const {
+ unsigned NotOutlinedSize = SequenceSize * Occurrences;
+ unsigned OutlinedSize;
+
+ // Is this candidate something we can outline as a tail call?
+ if (CanBeTailCall) {
+ // If yes, then we just outline the sequence and replace each of its
+ // occurrences with a branch instruction.
+ OutlinedSize = SequenceSize + Occurrences;
+ } else {
+ // If no, then we outline the sequence (SequenceSize), add a return (+1),
+ // and replace each occurrence with a save/restore to LR and a call
+ // (3 * Occurrences)
+ OutlinedSize = (SequenceSize + 1) + (3 * Occurrences);
+ }
+
+ // Return the number of instructions saved by outlining this sequence.
+ return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0;
+}
+
+bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
+ return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+}
+
+AArch64GenInstrInfo::MachineOutlinerInstrType
+AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
+
+ MachineFunction *MF = MI.getParent()->getParent();
+ AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
+
+ // Don't outline LOHs.
+ if (FuncInfo->getLOHRelated().count(&MI))
+ return MachineOutlinerInstrType::Illegal;
+
+ // Don't allow debug values to impact outlining type.
+ if (MI.isDebugValue() || MI.isIndirectDebugValue())
+ return MachineOutlinerInstrType::Invisible;
+
+ // Is this a terminator for a basic block?
+ if (MI.isTerminator()) {
+
+ // Is this the end of a function?
+ if (MI.getParent()->succ_empty())
+ return MachineOutlinerInstrType::Legal;
+
+ // It's not, so don't outline it.
+ return MachineOutlinerInstrType::Illegal;
+ }
+
+ // Don't outline positions.
+ if (MI.isPosition())
+ return MachineOutlinerInstrType::Illegal;
+
+ // Make sure none of the operands are un-outlinable.
+ for (const MachineOperand &MOP : MI.operands())
+ if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
+ MOP.isTargetIndex())
+ return MachineOutlinerInstrType::Illegal;
+
+ // Don't outline anything that uses the link register.
+ if (MI.modifiesRegister(AArch64::LR, &RI) ||
+ MI.readsRegister(AArch64::LR, &RI))
+ return MachineOutlinerInstrType::Illegal;
+
+ // Does this use the stack?
+ if (MI.modifiesRegister(AArch64::SP, &RI) ||
+ MI.readsRegister(AArch64::SP, &RI)) {
+
+ // Is it a memory operation?
+ if (MI.mayLoadOrStore()) {
+ unsigned Base; // Filled with the base regiser of MI.
+ int64_t Offset; // Filled with the offset of MI.
+ unsigned DummyWidth;
+
+ // Does it allow us to offset the base register and is the base SP?
+ if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
+ Base != AArch64::SP)
+ return MachineOutlinerInstrType::Illegal;
+
+ // Find the minimum/maximum offset for this instruction and check if
+ // fixing it up would be in range.
+ int64_t MinOffset, MaxOffset;
+ unsigned DummyScale;
+ getMemOpInfo(MI.getOpcode(), DummyScale, DummyWidth, MinOffset,
+ MaxOffset);
+
+ // TODO: We should really test what happens if an instruction overflows.
+ // This is tricky to test with IR tests, but when the outliner is moved
+ // to a MIR test, it really ought to be checked.
+ if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset)
+ return MachineOutlinerInstrType::Illegal;
+
+ // It's in range, so we can outline it.
+ return MachineOutlinerInstrType::Legal;
+ }
+
+ // We can't fix it up, so don't outline it.
+ return MachineOutlinerInstrType::Illegal;
+ }
+
+ return MachineOutlinerInstrType::Legal;
+}
+
+void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
+ for (MachineInstr &MI : MBB) {
+ unsigned Base, Width;
+ int64_t Offset;
+
+ // Is this a load or store with an immediate offset with SP as the base?
+ if (!MI.mayLoadOrStore() ||
+ !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
+ Base != AArch64::SP)
+ continue;
+
+ // It is, so we have to fix it up.
+ unsigned Scale;
+ int64_t Dummy1, Dummy2;
+
+ MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
+ assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
+ getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
+ assert(Scale != 0 && "Unexpected opcode!");
+
+ // We've pushed the return address to the stack, so add 16 to the offset.
+ // This is safe, since we already checked if it would overflow when we
+ // checked if this instruction was legal to outline.
+ int64_t NewImm = (Offset + 16)/Scale;
+ StackOffsetOperand.setImm(NewImm);
+ }
+}
+
+void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const {
+
+ // If this is a tail call outlined function, then there's already a return.
+ if (IsTailCall)
+ return;
+
+ // It's not a tail call, so we have to insert the return ourselves.
+ MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
+ .addReg(AArch64::LR, RegState::Undef);
+ MBB.insert(MBB.end(), ret);
+
+ // Walk over the basic block and fix up all the stack accesses.
+ fixupPostOutline(MBB);
+}
+
+void AArch64InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const {}
+
+MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
+ Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
+ MachineFunction &MF, bool IsTailCall) const {
+
+ // Are we tail calling?
+ if (IsTailCall) {
+ // If yes, then we can just branch to the label.
+ It = MBB.insert(It,
+ BuildMI(MF, DebugLoc(), get(AArch64::B))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
+ return It;
+ }
+
+ // We're not tail calling, so we have to save LR before the call and restore
+ // it after.
+ MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(-16);
+ It = MBB.insert(It, STRXpre);
+ It++;
+
+ // Insert the call.
+ It = MBB.insert(It,
+ BuildMI(MF, DebugLoc(), get(AArch64::BL))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
+
+ It++;
+
+ // Restore the link register.
+ MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(16);
+ It = MBB.insert(It, LDRXpost);
+
+ return It;
+}
+
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 5037866925d3..bacce441f6c5 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -133,12 +133,19 @@ public:
int64_t &Offset, unsigned &Width,
const TargetRegisterInfo *TRI) const;
+ /// Return the immediate offset of the base register in a load/store \p LdSt.
+ MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const;
+
+ /// \brief Returns true if opcode \p Opc is a memory operation. If it is, set
+ /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
+ ///
+ /// For unscaled instructions, \p Scale is set to 1.
+ bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
+ int64_t &MinOffset, int64_t &MaxOffset) const;
+
bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt,
unsigned NumLoads) const override;
- bool shouldScheduleAdjacent(const MachineInstr &First,
- const MachineInstr &Second) const override;
-
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
uint64_t Offset, const MDNode *Var,
const MDNode *Expr,
@@ -245,7 +252,33 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const override;
+ bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
+ unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences,
+ bool CanBeTailCall) const override;
+ AArch64GenInstrInfo::MachineOutlinerInstrType
+ getOutliningType(MachineInstr &MI) const override;
+ void insertOutlinerEpilogue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const override;
+ void insertOutlinerPrologue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool isTailCall) const override;
+ MachineBasicBlock::iterator
+ insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &It,
+ MachineFunction &MF,
+ bool IsTailCall) const override;
+ /// Returns true if the instruction has a shift by immediate that can be
+ /// executed in one cycle less.
+ bool isFalkorLSLFast(const MachineInstr &MI) const;
private:
+
+ /// \brief Sets the offsets on outlined instructions in \p MBB which use SP
+ /// so that they will be valid post-outlining.
+ ///
+ /// \param MBB A \p MachineBasicBlock in an outlined function.
+ void fixupPostOutline(MachineBasicBlock &MBB) const;
+
void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL,
MachineBasicBlock *TBB,
ArrayRef<MachineOperand> Cond) const;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 2244baacca17..4449412532f3 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -30,6 +30,8 @@ def HasLSE : Predicate<"Subtarget->hasLSE()">,
AssemblerPredicate<"FeatureLSE", "lse">;
def HasRAS : Predicate<"Subtarget->hasRAS()">,
AssemblerPredicate<"FeatureRAS", "ras">;
+def HasRDM : Predicate<"Subtarget->hasRDM()">,
+ AssemblerPredicate<"FeatureRDM", "rdm">;
def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
@@ -41,6 +43,11 @@ def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def UseAlternateSExtLoadCVTF32
: Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
+def UseNegativeImmediates
+ : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates",
+ "NegativeImmediates">;
+
+
//===----------------------------------------------------------------------===//
// AArch64-specific DAG Nodes.
//
@@ -424,8 +431,10 @@ def MSRpstateImm1 : MSRpstateImm0_1;
def MSRpstateImm4 : MSRpstateImm0_15;
// The thread pointer (on Linux, at least, where this has been implemented) is
-// TPIDR_EL0.
-def : Pat<(AArch64threadpointer), (MRS 0xde82)>;
+// TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects.
+let hasSideEffects = 0 in
+def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
+ [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>;
// The cycle counter PMC register is PMCCNTR_EL0.
let Predicates = [HasPerfMon] in
@@ -574,31 +583,31 @@ def : Pat<(f64 fpimm:$in),
// sequences.
def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
tglobaladdr:$g1, tglobaladdr:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48),
- tglobaladdr:$g2, 32),
- tglobaladdr:$g1, 16),
- tglobaladdr:$g0, 0)>;
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
+ tglobaladdr:$g1, 16),
+ tglobaladdr:$g2, 32),
+ tglobaladdr:$g3, 48)>;
def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
tblockaddress:$g1, tblockaddress:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48),
- tblockaddress:$g2, 32),
- tblockaddress:$g1, 16),
- tblockaddress:$g0, 0)>;
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
+ tblockaddress:$g1, 16),
+ tblockaddress:$g2, 32),
+ tblockaddress:$g3, 48)>;
def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
tconstpool:$g1, tconstpool:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48),
- tconstpool:$g2, 32),
- tconstpool:$g1, 16),
- tconstpool:$g0, 0)>;
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
+ tconstpool:$g1, 16),
+ tconstpool:$g2, 32),
+ tconstpool:$g3, 48)>;
def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
tjumptable:$g1, tjumptable:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g3, 48),
- tjumptable:$g2, 32),
- tjumptable:$g1, 16),
- tjumptable:$g0, 0)>;
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
+ tjumptable:$g1, 16),
+ tjumptable:$g2, 32),
+ tjumptable:$g3, 48)>;
//===----------------------------------------------------------------------===//
@@ -3284,7 +3293,7 @@ defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>
defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
-let Predicates = [HasV8_1a] in {
+let Predicates = [HasRDM] in {
defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
def : Pat<(i32 (int_aarch64_neon_sqadd
@@ -5029,7 +5038,7 @@ class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
0),
dsub)))>,
Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
-
+
def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index b51473524c72..878dac6bff1e 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -12,17 +12,19 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
-#include "AArch64InstructionSelector.h"
#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterBankInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
@@ -36,13 +38,61 @@ using namespace llvm;
#error "You shouldn't build this"
#endif
+namespace {
+
+class AArch64InstructionSelector : public InstructionSelector {
+public:
+ AArch64InstructionSelector(const AArch64TargetMachine &TM,
+ const AArch64Subtarget &STI,
+ const AArch64RegisterBankInfo &RBI);
+
+ bool select(MachineInstr &I) const override;
+
+private:
+ /// tblgen-erated 'select' implementation, used as the initial selector for
+ /// the patterns that don't require complex C++.
+ bool selectImpl(MachineInstr &I) const;
+
+ bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
+ MachineRegisterInfo &MRI) const;
+ bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
+ MachineRegisterInfo &MRI) const;
+
+ bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
+ MachineRegisterInfo &MRI) const;
+
+ bool selectArithImmed(MachineOperand &Root, MachineOperand &Result1,
+ MachineOperand &Result2) const;
+
+ const AArch64TargetMachine &TM;
+ const AArch64Subtarget &STI;
+ const AArch64InstrInfo &TII;
+ const AArch64RegisterInfo &TRI;
+ const AArch64RegisterBankInfo &RBI;
+
+// We declare the temporaries used by selectImpl() in the class to minimize the
+// cost of constructing placeholder values.
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+};
+
+} // end anonymous namespace
+
+#define GET_GLOBALISEL_IMPL
#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
AArch64InstructionSelector::AArch64InstructionSelector(
const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI)
- : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), RBI(RBI) {}
+ : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
+ TRI(*STI.getRegisterInfo()), RBI(RBI)
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+{
+}
// FIXME: This should be target-independent, inferred from the types declared
// for each class in the bank.
@@ -119,67 +169,34 @@ static bool unsupportedBinOp(const MachineInstr &I,
}
/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
-/// (such as G_OR or G_ADD), appropriate for the register bank \p RegBankID
+/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
/// and of size \p OpSize.
/// \returns \p GenericOpc if the combination is unsupported.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
unsigned OpSize) {
switch (RegBankID) {
case AArch64::GPRRegBankID:
- if (OpSize <= 32) {
- assert((OpSize == 32 || (GenericOpc != TargetOpcode::G_SDIV &&
- GenericOpc != TargetOpcode::G_UDIV &&
- GenericOpc != TargetOpcode::G_LSHR &&
- GenericOpc != TargetOpcode::G_ASHR)) &&
- "operation should have been legalized before now");
-
+ if (OpSize == 32) {
switch (GenericOpc) {
- case TargetOpcode::G_OR:
- return AArch64::ORRWrr;
- case TargetOpcode::G_XOR:
- return AArch64::EORWrr;
- case TargetOpcode::G_AND:
- return AArch64::ANDWrr;
- case TargetOpcode::G_ADD:
- assert(OpSize != 32 && "s32 G_ADD should have been selected");
- return AArch64::ADDWrr;
- case TargetOpcode::G_SUB:
- return AArch64::SUBWrr;
case TargetOpcode::G_SHL:
return AArch64::LSLVWr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVWr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVWr;
- case TargetOpcode::G_SDIV:
- return AArch64::SDIVWr;
- case TargetOpcode::G_UDIV:
- return AArch64::UDIVWr;
default:
return GenericOpc;
}
} else if (OpSize == 64) {
switch (GenericOpc) {
- case TargetOpcode::G_OR:
- return AArch64::ORRXrr;
- case TargetOpcode::G_XOR:
- return AArch64::EORXrr;
- case TargetOpcode::G_AND:
- return AArch64::ANDXrr;
case TargetOpcode::G_GEP:
return AArch64::ADDXrr;
- case TargetOpcode::G_SUB:
- return AArch64::SUBXrr;
case TargetOpcode::G_SHL:
return AArch64::LSLVXr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVXr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVXr;
- case TargetOpcode::G_SDIV:
- return AArch64::SDIVXr;
- case TargetOpcode::G_UDIV:
- return AArch64::UDIVXr;
default:
return GenericOpc;
}
@@ -473,6 +490,82 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
}
}
+bool AArch64InstructionSelector::selectCompareBranch(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+
+ const unsigned CondReg = I.getOperand(0).getReg();
+ MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ MachineInstr *CCMI = MRI.getVRegDef(CondReg);
+ if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
+ return false;
+
+ unsigned LHS = CCMI->getOperand(2).getReg();
+ unsigned RHS = CCMI->getOperand(3).getReg();
+ if (!getConstantVRegVal(RHS, MRI))
+ std::swap(RHS, LHS);
+
+ const auto RHSImm = getConstantVRegVal(RHS, MRI);
+ if (!RHSImm || *RHSImm != 0)
+ return false;
+
+ const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
+ if (RB.getID() != AArch64::GPRRegBankID)
+ return false;
+
+ const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
+ if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
+ return false;
+
+ const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
+ unsigned CBOpc = 0;
+ if (CmpWidth <= 32)
+ CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
+ else if (CmpWidth == 64)
+ CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
+ else
+ return false;
+
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
+ .addUse(LHS)
+ .addMBB(DestMBB);
+
+ constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectVaStartAAPCS(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+ return false;
+}
+
+bool AArch64InstructionSelector::selectVaStartDarwin(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ unsigned ListReg = I.getOperand(0).getReg();
+
+ unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+
+ auto MIB =
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
+ .addDef(ArgsAddrReg)
+ .addFrameIndex(FuncInfo->getVarArgsStackIndex())
+ .addImm(0)
+ .addImm(0);
+
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+
+ MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
+ .addUse(ArgsAddrReg)
+ .addUse(ListReg)
+ .addImm(0)
+ .addMemOperand(*I.memoperands_begin());
+
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -549,6 +642,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
const unsigned CondReg = I.getOperand(0).getReg();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ if (selectCompareBranch(I, MF, MRI))
+ return true;
+
auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
.addUse(CondReg)
.addImm(/*bit offset=*/0)
@@ -558,6 +654,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
}
+ case TargetOpcode::G_BRINDIRECT: {
+ I.setDesc(TII.get(AArch64::BR));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_CONSTANT: {
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
@@ -629,9 +730,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
// FIXME: Is going through int64_t always correct?
ImmOp.ChangeToImmediate(
ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
- } else {
+ } else if (I.getOperand(1).isCImm()) {
uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
I.getOperand(1).ChangeToImmediate(Val);
+ } else if (I.getOperand(1).isImm()) {
+ uint64_t Val = I.getOperand(1).getImm();
+ I.getOperand(1).ChangeToImmediate(Val);
}
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -686,10 +790,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return false;
}
-#ifndef NDEBUG
- // Sanity-check the pointer register.
+ auto &MemOp = **I.memoperands_begin();
+ if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
+ DEBUG(dbgs() << "Atomic load/store not supported yet\n");
+ return false;
+ }
+
const unsigned PtrReg = I.getOperand(1).getReg();
+#ifndef NDEBUG
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
+ // Sanity-check the pointer register.
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
"Load/Store pointer operand isn't a GPR");
assert(MRI.getType(PtrReg).isPointer() &&
@@ -706,11 +816,46 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
I.setDesc(TII.get(NewOpc));
- I.addOperand(MachineOperand::CreateImm(0));
+ uint64_t Offset = 0;
+ auto *PtrMI = MRI.getVRegDef(PtrReg);
+
+ // Try to fold a GEP into our unsigned immediate addressing mode.
+ if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
+ if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
+ int64_t Imm = *COff;
+ const unsigned Size = MemTy.getSizeInBits() / 8;
+ const unsigned Scale = Log2_32(Size);
+ if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
+ unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
+ I.getOperand(1).setReg(Ptr2Reg);
+ PtrMI = MRI.getVRegDef(Ptr2Reg);
+ Offset = Imm / Size;
+ }
+ }
+ }
+
+ // If we haven't folded anything into our addressing mode yet, try to fold
+ // a frame index into the base+offset.
+ if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+ I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
+
+ I.addOperand(MachineOperand::CreateImm(Offset));
+
+ // If we're storing a 0, use WZR/XZR.
+ if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
+ if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
+ if (I.getOpcode() == AArch64::STRWui)
+ I.getOperand(0).setReg(AArch64::WZR);
+ else if (I.getOpcode() == AArch64::STRXui)
+ I.getOperand(0).setReg(AArch64::XZR);
+ }
+ }
+
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- case TargetOpcode::G_MUL: {
+ case TargetOpcode::G_SMULH:
+ case TargetOpcode::G_UMULH: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
@@ -719,48 +864,33 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID) {
- DEBUG(dbgs() << "G_MUL on bank: " << RB << ", expected: GPR\n");
+ DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
return false;
}
- unsigned ZeroReg;
- unsigned NewOpc;
- if (Ty.isScalar() && Ty.getSizeInBits() <= 32) {
- NewOpc = AArch64::MADDWrrr;
- ZeroReg = AArch64::WZR;
- } else if (Ty == LLT::scalar(64)) {
- NewOpc = AArch64::MADDXrrr;
- ZeroReg = AArch64::XZR;
- } else {
- DEBUG(dbgs() << "G_MUL has type: " << Ty << ", expected: "
- << LLT::scalar(32) << " or " << LLT::scalar(64) << '\n');
+ if (Ty != LLT::scalar(64)) {
+ DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
+ << ", expected: " << LLT::scalar(64) << '\n');
return false;
}
+ unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
+ : AArch64::UMULHrr;
I.setDesc(TII.get(NewOpc));
- I.addOperand(MachineOperand::CreateReg(ZeroReg, /*isDef=*/false));
-
// Now that we selected an opcode, we need to constrain the register
// operands to use appropriate classes.
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
-
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_OR:
- case TargetOpcode::G_XOR:
- case TargetOpcode::G_AND:
case TargetOpcode::G_SHL:
case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR:
- case TargetOpcode::G_SDIV:
- case TargetOpcode::G_UDIV:
- case TargetOpcode::G_ADD:
- case TargetOpcode::G_SUB:
case TargetOpcode::G_GEP: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
@@ -783,6 +913,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+ case TargetOpcode::G_PTR_MASK: {
+ uint64_t Align = I.getOperand(2).getImm();
+ if (Align >= 64 || Align == 0)
+ return false;
+
+ uint64_t Mask = ~((1ULL << Align) - 1);
+ I.setDesc(TII.get(AArch64::ANDXri));
+ I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
case TargetOpcode::G_PTRTOINT:
case TargetOpcode::G_TRUNC: {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
@@ -1026,7 +1167,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
if (Ty == LLT::scalar(32)) {
CSelOpc = AArch64::CSELWr;
- } else if (Ty == LLT::scalar(64)) {
+ } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
CSelOpc = AArch64::CSELXr;
} else {
return false;
@@ -1134,7 +1275,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
.addDef(Def1Reg)
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
- .addImm(CC1);
+ .addImm(getInvertedCondCode(CC1));
if (CC2 != AArch64CC::AL) {
unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
@@ -1143,7 +1284,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
.addDef(Def2Reg)
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
- .addImm(CC2);
+ .addImm(getInvertedCondCode(CC2));
MachineInstr &OrMI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
.addDef(DefReg)
@@ -1159,7 +1300,69 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
I.eraseFromParent();
return true;
}
+ case TargetOpcode::G_VASTART:
+ return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
+ : selectVaStartAAPCS(I, MF, MRI);
}
return false;
}
+
+/// SelectArithImmed - Select an immediate value that can be represented as
+/// a 12-bit value shifted left by either 0 or 12. If so, return true with
+/// Val set to the 12-bit value and Shift set to the shifter operand.
+bool AArch64InstructionSelector::selectArithImmed(
+ MachineOperand &Root, MachineOperand &Result1,
+ MachineOperand &Result2) const {
+ MachineInstr &MI = *Root.getParent();
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // This function is called from the addsub_shifted_imm ComplexPattern,
+ // which lists [imm] as the list of opcode it's interested in, however
+ // we still need to check whether the operand is actually an immediate
+ // here because the ComplexPattern opcode list is only used in
+ // root-level opcode matching.
+ uint64_t Immed;
+ if (Root.isImm())
+ Immed = Root.getImm();
+ else if (Root.isCImm())
+ Immed = Root.getCImm()->getZExtValue();
+ else if (Root.isReg()) {
+ MachineInstr *Def = MRI.getVRegDef(Root.getReg());
+ if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
+ return false;
+ MachineOperand &Op1 = Def->getOperand(1);
+ if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
+ return false;
+ Immed = Op1.getCImm()->getZExtValue();
+ } else
+ return false;
+
+ unsigned ShiftAmt;
+
+ if (Immed >> 12 == 0) {
+ ShiftAmt = 0;
+ } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
+ ShiftAmt = 12;
+ Immed = Immed >> 12;
+ } else
+ return false;
+
+ unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
+ Result1.ChangeToImmediate(Immed);
+ Result1.clearParent();
+ Result2.ChangeToImmediate(ShVal);
+ Result2.clearParent();
+ return true;
+}
+
+namespace llvm {
+InstructionSelector *
+createAArch64InstructionSelector(const AArch64TargetMachine &TM,
+ AArch64Subtarget &Subtarget,
+ AArch64RegisterBankInfo &RBI) {
+ return new AArch64InstructionSelector(TM, Subtarget, RBI);
+}
+}
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.h b/lib/Target/AArch64/AArch64InstructionSelector.h
deleted file mode 100644
index 2c6e5a912fb7..000000000000
--- a/lib/Target/AArch64/AArch64InstructionSelector.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===- AArch64InstructionSelector --------------------------------*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file declares the targeting of the InstructionSelector class for
-/// AArch64.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
-#define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
-
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-
-namespace llvm {
-
-class AArch64InstrInfo;
-class AArch64RegisterBankInfo;
-class AArch64RegisterInfo;
-class AArch64Subtarget;
-class AArch64TargetMachine;
-
-class AArch64InstructionSelector : public InstructionSelector {
-public:
- AArch64InstructionSelector(const AArch64TargetMachine &TM,
- const AArch64Subtarget &STI,
- const AArch64RegisterBankInfo &RBI);
-
- bool select(MachineInstr &I) const override;
-
-private:
- /// tblgen-erated 'select' implementation, used as the initial selector for
- /// the patterns that don't require complex C++.
- bool selectImpl(MachineInstr &I) const;
-
- const AArch64TargetMachine &TM;
- const AArch64Subtarget &STI;
- const AArch64InstrInfo &TII;
- const AArch64RegisterInfo &TRI;
- const AArch64RegisterBankInfo &RBI;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 83f276a8161b..6e6daf812295 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -13,7 +13,10 @@
//===----------------------------------------------------------------------===//
#include "AArch64LegalizerInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Target/TargetOpcodes.h"
@@ -36,11 +39,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
const LLT v4s32 = LLT::vector(4, 32);
const LLT v2s64 = LLT::vector(2, 64);
- for (auto BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) {
+ for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) {
// These operations naturally get the right answer when used on
// GPR32, even if the actual type is narrower.
- for (auto Ty : {s1, s8, s16, s32, s64, v2s32, v4s32, v2s64})
+ for (auto Ty : {s32, s64, v2s32, v4s32, v2s64})
setAction({BinOp, Ty}, Legal);
+
+ for (auto Ty : {s1, s8, s16})
+ setAction({BinOp, Ty}, WidenScalar);
}
setAction({G_GEP, p0}, Legal);
@@ -49,7 +55,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
for (auto Ty : {s1, s8, s16, s32})
setAction({G_GEP, 1, Ty}, WidenScalar);
- for (auto BinOp : {G_LSHR, G_ASHR, G_SDIV, G_UDIV}) {
+ setAction({G_PTR_MASK, p0}, Legal);
+
+ for (unsigned BinOp : {G_LSHR, G_ASHR, G_SDIV, G_UDIV}) {
for (auto Ty : {s32, s64})
setAction({BinOp, Ty}, Legal);
@@ -57,25 +65,41 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({BinOp, Ty}, WidenScalar);
}
- for (auto BinOp : { G_SREM, G_UREM })
+ for (unsigned BinOp : {G_SREM, G_UREM})
for (auto Ty : { s1, s8, s16, s32, s64 })
setAction({BinOp, Ty}, Lower);
- for (auto Op : { G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_SMULO, G_UMULO }) {
+ for (unsigned Op : {G_SMULO, G_UMULO})
+ setAction({Op, s64}, Lower);
+
+ for (unsigned Op : {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_SMULH, G_UMULH}) {
for (auto Ty : { s32, s64 })
setAction({Op, Ty}, Legal);
setAction({Op, 1, s1}, Legal);
}
- for (auto BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
+ for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
for (auto Ty : {s32, s64})
setAction({BinOp, Ty}, Legal);
- setAction({G_FREM, s32}, Libcall);
- setAction({G_FREM, s64}, Libcall);
+ for (unsigned BinOp : {G_FREM, G_FPOW}) {
+ setAction({BinOp, s32}, Libcall);
+ setAction({BinOp, s64}, Libcall);
+ }
- for (auto MemOp : {G_LOAD, G_STORE}) {
+ for (auto Ty : {s32, s64, p0}) {
+ setAction({G_INSERT, Ty}, Legal);
+ setAction({G_INSERT, 1, Ty}, Legal);
+ }
+ for (auto Ty : {s1, s8, s16}) {
+ setAction({G_INSERT, Ty}, WidenScalar);
+ setAction({G_INSERT, 1, Ty}, Legal);
+ // FIXME: Can't widen the sources because that violates the constraints on
+ // G_INSERT (It seems entirely reasonable that inputs shouldn't overlap).
+ }
+
+ for (unsigned MemOp : {G_LOAD, G_STORE}) {
for (auto Ty : {s8, s16, s32, s64, p0, v2s32})
setAction({MemOp, Ty}, Legal);
@@ -141,12 +165,18 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_TRUNC, 1, Ty}, Legal);
// Conversions
- for (auto Ty : { s1, s8, s16, s32, s64 }) {
+ for (auto Ty : { s32, s64 }) {
setAction({G_FPTOSI, 0, Ty}, Legal);
setAction({G_FPTOUI, 0, Ty}, Legal);
setAction({G_SITOFP, 1, Ty}, Legal);
setAction({G_UITOFP, 1, Ty}, Legal);
}
+ for (auto Ty : { s1, s8, s16 }) {
+ setAction({G_FPTOSI, 0, Ty}, WidenScalar);
+ setAction({G_FPTOUI, 0, Ty}, WidenScalar);
+ setAction({G_SITOFP, 1, Ty}, WidenScalar);
+ setAction({G_UITOFP, 1, Ty}, WidenScalar);
+ }
for (auto Ty : { s32, s64 }) {
setAction({G_FPTOSI, 1, Ty}, Legal);
@@ -158,9 +188,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
// Control-flow
for (auto Ty : {s1, s8, s16, s32})
setAction({G_BRCOND, Ty}, Legal);
+ setAction({G_BRINDIRECT, p0}, Legal);
// Select
- for (auto Ty : {s1, s8, s16, s32, s64})
+ for (auto Ty : {s1, s8, s16})
+ setAction({G_SELECT, Ty}, WidenScalar);
+
+ for (auto Ty : {s32, s64, p0})
setAction({G_SELECT, Ty}, Legal);
setAction({G_SELECT, 1, s1}, Legal);
@@ -200,5 +234,82 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_BITCAST, 1, LLT::vector(32/EltSize, EltSize)}, Legal);
}
+ setAction({G_VASTART, p0}, Legal);
+
+ // va_list must be a pointer, but most sized types are pretty easy to handle
+ // as the destination.
+ setAction({G_VAARG, 1, p0}, Legal);
+
+ for (auto Ty : {s8, s16, s32, s64, p0})
+ setAction({G_VAARG, Ty}, Custom);
+
computeTables();
}
+
+bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ switch (MI.getOpcode()) {
+ default:
+ // No idea what to do.
+ return false;
+ case TargetOpcode::G_VAARG:
+ return legalizeVaArg(MI, MRI, MIRBuilder);
+ }
+
+ llvm_unreachable("expected switch to return");
+}
+
+bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ MIRBuilder.setInstr(MI);
+ MachineFunction &MF = MIRBuilder.getMF();
+ unsigned Align = MI.getOperand(2).getImm();
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned ListPtr = MI.getOperand(1).getReg();
+
+ LLT PtrTy = MRI.getType(ListPtr);
+ LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
+
+ const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
+ unsigned List = MRI.createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildLoad(
+ List, ListPtr,
+ *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
+ PtrSize, /* Align = */ PtrSize));
+
+ unsigned DstPtr;
+ if (Align > PtrSize) {
+ // Realign the list to the actual required alignment.
+ unsigned AlignMinus1 = MRI.createGenericVirtualRegister(IntPtrTy);
+ MIRBuilder.buildConstant(AlignMinus1, Align - 1);
+
+ unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildGEP(ListTmp, List, AlignMinus1);
+
+ DstPtr = MRI.createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
+ } else
+ DstPtr = List;
+
+ uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
+ MIRBuilder.buildLoad(
+ Dst, DstPtr,
+ *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
+ ValSize, std::max(Align, PtrSize)));
+
+ unsigned SizeReg = MRI.createGenericVirtualRegister(IntPtrTy);
+ MIRBuilder.buildConstant(SizeReg, alignTo(ValSize, PtrSize));
+
+ unsigned NewList = MRI.createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildGEP(NewList, DstPtr, SizeReg);
+
+ MIRBuilder.buildStore(
+ NewList, ListPtr,
+ *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
+ PtrSize, /* Align = */ PtrSize));
+
+ MI.eraseFromParent();
+ return true;
+}
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.h b/lib/Target/AArch64/AArch64LegalizerInfo.h
index feacbef9f147..42d4ac130c5c 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -25,6 +25,13 @@ class LLVMContext;
class AArch64LegalizerInfo : public LegalizerInfo {
public:
AArch64LegalizerInfo();
+
+ bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const override;
+
+private:
+ bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
};
} // End llvm namespace.
#endif
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8e312dcf276f..976498aa70d6 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -16,19 +16,29 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+
using namespace llvm;
#define DEBUG_TYPE "aarch64-ldst-opt"
@@ -58,15 +68,15 @@ typedef struct LdStPairFlags {
// If a matching instruction is found, MergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
- bool MergeForward;
+ bool MergeForward = false;
// SExtIdx gives the index of the result of the load pair that must be
// extended. The value of SExtIdx assumes that the paired load produces the
// value in this order: (I, returned iterator), i.e., -1 means no value has
// to be extended, 0 means I, and 1 means the returned iterator.
- int SExtIdx;
+ int SExtIdx = -1;
- LdStPairFlags() : MergeForward(false), SExtIdx(-1) {}
+ LdStPairFlags() = default;
void setMergeForward(bool V = true) { MergeForward = V; }
bool getMergeForward() const { return MergeForward; }
@@ -78,10 +88,12 @@ typedef struct LdStPairFlags {
struct AArch64LoadStoreOpt : public MachineFunctionPass {
static char ID;
+
AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
}
+ AliasAnalysis *AA;
const AArch64InstrInfo *TII;
const TargetRegisterInfo *TRI;
const AArch64Subtarget *Subtarget;
@@ -89,6 +101,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Track which registers have been modified and used.
BitVector ModifiedRegs, UsedRegs;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
// Return the matching instruction if one is found, else MBB->end().
@@ -162,8 +179,10 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
};
+
char AArch64LoadStoreOpt::ID = 0;
-} // namespace
+
+} // end anonymous namespace
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
AARCH64_LOAD_STORE_OPT_NAME, false, false)
@@ -246,7 +265,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
default:
if (IsValidLdStrOpc)
*IsValidLdStrOpc = false;
- return UINT_MAX;
+ return std::numeric_limits<unsigned>::max();
case AArch64::STRDui:
case AArch64::STURDi:
case AArch64::STRQui:
@@ -595,7 +614,7 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
MachineInstrBuilder MIB;
MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
.addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
- .addOperand(BaseRegOp)
+ .add(BaseRegOp)
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*MergeMI));
(void)MIB;
@@ -709,9 +728,9 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
}
}
MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc)))
- .addOperand(RegOp0)
- .addOperand(RegOp1)
- .addOperand(BaseRegOp)
+ .add(RegOp0)
+ .add(RegOp1)
+ .add(BaseRegOp)
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*Paired));
@@ -923,7 +942,7 @@ static int alignTo(int Num, int PowOf2) {
}
static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb,
- const AArch64InstrInfo *TII) {
+ AliasAnalysis *AA) {
// One of the instructions must modify memory.
if (!MIa.mayStore() && !MIb.mayStore())
return false;
@@ -932,14 +951,14 @@ static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb,
if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore())
return false;
- return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb);
+ return MIa.mayAlias(AA, MIb, /*UseTBAA*/false);
}
static bool mayAlias(MachineInstr &MIa,
SmallVectorImpl<MachineInstr *> &MemInsns,
- const AArch64InstrInfo *TII) {
+ AliasAnalysis *AA) {
for (MachineInstr *MIb : MemInsns)
- if (mayAlias(MIa, *MIb, TII))
+ if (mayAlias(MIa, *MIb, AA))
return true;
return false;
@@ -997,7 +1016,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
return false;
// If we encounter a store aliased with the load, return early.
- if (MI.mayStore() && mayAlias(LoadMI, MI, TII))
+ if (MI.mayStore() && mayAlias(LoadMI, MI, AA))
return false;
} while (MBBI != B && Count < Limit);
return false;
@@ -1167,7 +1186,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// first.
if (!ModifiedRegs[getLdStRegOp(MI).getReg()] &&
!(MI.mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) &&
- !mayAlias(MI, MemInsns, TII)) {
+ !mayAlias(MI, MemInsns, AA)) {
Flags.setMergeForward(false);
return MBBI;
}
@@ -1178,7 +1197,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// into the second.
if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] &&
!(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) &&
- !mayAlias(FirstMI, MemInsns, TII)) {
+ !mayAlias(FirstMI, MemInsns, AA)) {
Flags.setMergeForward(true);
return MBBI;
}
@@ -1233,19 +1252,19 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
if (!isPairedLdSt(*I)) {
// Non-paired instruction.
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(getLdStRegOp(*Update))
- .addOperand(getLdStRegOp(*I))
- .addOperand(getLdStBaseOp(*I))
+ .add(getLdStRegOp(*Update))
+ .add(getLdStRegOp(*I))
+ .add(getLdStBaseOp(*I))
.addImm(Value)
.setMemRefs(I->memoperands_begin(), I->memoperands_end());
} else {
// Paired instruction.
int Scale = getMemScale(*I);
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(getLdStRegOp(*Update))
- .addOperand(getLdStRegOp(*I, 0))
- .addOperand(getLdStRegOp(*I, 1))
- .addOperand(getLdStBaseOp(*I))
+ .add(getLdStRegOp(*Update))
+ .add(getLdStRegOp(*I, 0))
+ .add(getLdStRegOp(*I, 1))
+ .add(getLdStBaseOp(*I))
.addImm(Value / Scale)
.setMemRefs(I->memoperands_begin(), I->memoperands_end());
}
@@ -1545,7 +1564,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
case AArch64::LDURBBi:
case AArch64::LDURHHi:
case AArch64::LDURWi:
- case AArch64::LDURXi: {
+ case AArch64::LDURXi:
if (tryToPromoteLoadFromStore(MBBI)) {
Modified = true;
break;
@@ -1553,7 +1572,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++MBBI;
break;
}
- }
}
// 2) Merge adjacent zero stores into a wider store.
// e.g.,
@@ -1722,6 +1740,7 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
TRI = Subtarget->getRegisterInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
// Resize the modified and used register bitfield trackers. We do this once
// per function and then clear the bitfield each time we optimize a load or
diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp
new file mode 100644
index 000000000000..a6926a6700e1
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -0,0 +1,272 @@
+//===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file This file contains the AArch64 implementation of the DAG scheduling mutation
+// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MacroFusion.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define DEBUG_TYPE "misched"
+
+STATISTIC(NumFused, "Number of instr pairs fused");
+
+using namespace llvm;
+
+static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+namespace {
+
+/// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given an anchor instr, when the other instr is unspecified, then
+/// check if the anchor instr may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr *SecondMI) {
+ assert((FirstMI || SecondMI) && "At least one instr must be specified");
+
+ const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII);
+ const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI);
+
+ // Assume wildcards for unspecified instrs.
+ unsigned FirstOpcode =
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode =
+ SecondMI ? SecondMI->getOpcode()
+ : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
+
+ if (ST.hasArithmeticBccFusion())
+ // Fuse CMN, CMP, TST followed by Bcc.
+ if (SecondOpcode == AArch64::Bcc)
+ switch (FirstOpcode) {
+ default:
+ return false;
+ case AArch64::ADDSWri:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXri:
+ case AArch64::ADDSXrr:
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
+ case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSXrr:
+ case AArch64::BICSWrr:
+ case AArch64::BICSXrr:
+ return true;
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !II.hasShiftedReg(*FirstMI);
+ case AArch64::INSTRUCTION_LIST_END:
+ return true;
+ }
+
+ if (ST.hasArithmeticCbzFusion())
+ // Fuse ALU operations followed by CBZ/CBNZ.
+ if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
+ SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX)
+ switch (FirstOpcode) {
+ default:
+ return false;
+ case AArch64::ADDWri:
+ case AArch64::ADDWrr:
+ case AArch64::ADDXri:
+ case AArch64::ADDXrr:
+ case AArch64::ANDWri:
+ case AArch64::ANDWrr:
+ case AArch64::ANDXri:
+ case AArch64::ANDXrr:
+ case AArch64::EORWri:
+ case AArch64::EORWrr:
+ case AArch64::EORXri:
+ case AArch64::EORXrr:
+ case AArch64::ORRWri:
+ case AArch64::ORRWrr:
+ case AArch64::ORRXri:
+ case AArch64::ORRXrr:
+ case AArch64::SUBWri:
+ case AArch64::SUBWrr:
+ case AArch64::SUBXri:
+ case AArch64::SUBXrr:
+ return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !II.hasShiftedReg(*FirstMI);
+ case AArch64::INSTRUCTION_LIST_END:
+ return true;
+ }
+
+ if (ST.hasFuseAES())
+ // Fuse AES crypto operations.
+ switch(FirstOpcode) {
+ // AES encode.
+ case AArch64::AESErr:
+ return SecondOpcode == AArch64::AESMCrr ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // AES decode.
+ case AArch64::AESDrr:
+ return SecondOpcode == AArch64::AESIMCrr ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ }
+
+ if (ST.hasFuseLiterals())
+ // Fuse literal generation operations.
+ switch (FirstOpcode) {
+ // PC relative address.
+ case AArch64::ADRP:
+ return SecondOpcode == AArch64::ADDXri ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // 32 bit immediate.
+ case AArch64::MOVZWi:
+ return (SecondOpcode == AArch64::MOVKWi &&
+ SecondMI->getOperand(3).getImm() == 16) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // Lower half of 64 bit immediate.
+ case AArch64::MOVZXi:
+ return (SecondOpcode == AArch64::MOVKXi &&
+ SecondMI->getOperand(3).getImm() == 16) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // Upper half of 64 bit immediate.
+ case AArch64::MOVKXi:
+ return FirstMI->getOperand(3).getImm() == 32 &&
+ ((SecondOpcode == AArch64::MOVKXi &&
+ SecondMI->getOperand(3).getImm() == 48) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END);
+ }
+
+ return false;
+}
+
+/// \brief Implement the fusion of instr pairs in the scheduling DAG,
+/// anchored at the instr in AnchorSU..
+static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) {
+ const MachineInstr *AnchorMI = AnchorSU.getInstr();
+ if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient())
+ return false;
+
+ // If the anchor instr is the ExitSU, then consider its predecessors;
+ // otherwise, its successors.
+ bool Preds = (&AnchorSU == &DAG->ExitSU);
+ SmallVectorImpl<SDep> &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs;
+
+ const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI;
+ const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr;
+
+ // Check if the anchor instr may be fused.
+ if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
+ FirstMI, SecondMI))
+ return false;
+
+ // Explorer for fusion candidates among the dependencies of the anchor instr.
+ for (SDep &Dep : AnchorDeps) {
+ // Ignore dependencies that don't enforce ordering.
+ if (Dep.isWeak())
+ continue;
+
+ SUnit &DepSU = *Dep.getSUnit();
+ // Ignore the ExitSU if the dependents are successors.
+ if (!Preds && &DepSU == &DAG->ExitSU)
+ continue;
+
+ const MachineInstr *DepMI = DepSU.getInstr();
+ if (!DepMI || DepMI->isPseudo() || DepMI->isTransient())
+ continue;
+
+ FirstMI = Preds ? DepMI : AnchorMI;
+ SecondMI = Preds ? AnchorMI : DepMI;
+ if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
+ FirstMI, SecondMI))
+ continue;
+
+ // Create a single weak edge between the adjacent instrs. The only effect is
+ // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
+ SUnit &FirstSU = Preds ? DepSU : AnchorSU;
+ SUnit &SecondSU = Preds ? AnchorSU : DepSU;
+ DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster));
+
+ // Adjust the latency between the anchor instr and its
+ // predecessors/successors.
+ for (SDep &IDep : AnchorDeps)
+ if (IDep.getSUnit() == &DepSU)
+ IDep.setLatency(0);
+
+ // Adjust the latency between the dependent instr and its
+ // successors/predecessors.
+ for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds)
+ if (IDep.getSUnit() == &AnchorSU)
+ IDep.setLatency(0);
+
+ DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse ";
+ FirstSU.print(dbgs(), DAG); dbgs() << " - ";
+ SecondSU.print(dbgs(), DAG); dbgs() << " / ";
+ dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " <<
+ DAG->TII->getName(SecondMI->getOpcode()) << '\n'; );
+
+ ++NumFused;
+ return true;
+ }
+
+ return false;
+}
+
+/// \brief Post-process the DAG to create cluster edges between instrs that may
+/// be fused by the processor into a single operation.
+class AArch64MacroFusion : public ScheduleDAGMutation {
+public:
+ AArch64MacroFusion() {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+};
+
+void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
+ // For each of the SUnits in the scheduling block, try to fuse the instr in it
+ // with one in its successors.
+ for (SUnit &ISU : DAG->SUnits)
+ scheduleAdjacentImpl(DAG, ISU);
+
+ // Try to fuse the instr in the ExitSU with one in its predecessors.
+ scheduleAdjacentImpl(DAG, DAG->ExitSU);
+}
+
+} // end namespace
+
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () {
+ return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr;
+}
+
+} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64MacroFusion.h b/lib/Target/AArch64/AArch64MacroFusion.h
new file mode 100644
index 000000000000..e5efedd9fbfd
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MacroFusion.h
@@ -0,0 +1,29 @@
+//===- AArch64MacroFusion.h - AArch64 Macro Fusion ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// \fileThis file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 definition of the DAG scheduling mutation
+// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+
+//===----------------------------------------------------------------------===//
+// AArch64MacroFusion - DAG post-processing to encourage fusion of macro ops.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createAArch64MacroFusionDAGMutation());
+/// to AArch64PassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation();
+
+} // llvm
diff --git a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index 8f45e6a80a36..f3c8e7e9bdc2 100644
--- a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -12,13 +12,14 @@
// CBZW %W0, <BB#2>
// BB#2:
// %W0 = COPY %WZR
-// This pass should be run after register allocation.
+// Similarly, this pass also handles non-zero copies.
+// BB#0:
+// cmp x0, #1
+// b.eq .LBB0_1
+// .LBB0_1:
+// orr x0, xzr, #0x1
//
-// FIXME: This should be extended to handle any constant other than zero. E.g.,
-// cmp w0, #1
-// b.eq .BB1
-// BB1:
-// mov w0, #1
+// This pass should be run after register allocation.
//
// FIXME: This could also be extended to check the whole dominance subtree below
// the comparison if the compile time regression is acceptable.
@@ -26,6 +27,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -43,6 +45,7 @@ namespace {
class AArch64RedundantCopyElimination : public MachineFunctionPass {
const MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
+ BitVector ClobberedRegs;
public:
static char ID;
@@ -50,6 +53,16 @@ public:
initializeAArch64RedundantCopyEliminationPass(
*PassRegistry::getPassRegistry());
}
+
+ struct RegImm {
+ MCPhysReg Reg;
+ int32_t Imm;
+ RegImm(MCPhysReg Reg, int32_t Imm) : Reg(Reg), Imm(Imm) {}
+ };
+
+ Optional<RegImm> knownRegValInBlock(MachineInstr &CondBr,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &FirstUse);
bool optimizeCopy(MachineBasicBlock *MBB);
bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
@@ -66,18 +79,120 @@ char AArch64RedundantCopyElimination::ID = 0;
INITIALIZE_PASS(AArch64RedundantCopyElimination, "aarch64-copyelim",
"AArch64 redundant copy elimination pass", false, false)
-static bool guaranteesZeroRegInBlock(MachineInstr &MI, MachineBasicBlock *MBB) {
- unsigned Opc = MI.getOpcode();
+/// Remember what registers the specified instruction modifies.
+static void trackRegDefs(const MachineInstr &MI, BitVector &ClobberedRegs,
+ const TargetRegisterInfo *TRI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask()) {
+ ClobberedRegs.setBitsNotInMask(MO.getRegMask());
+ continue;
+ }
+
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (!MO.isDef())
+ continue;
+
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ ClobberedRegs.set(*AI);
+ }
+}
+
+/// It's possible to determine the value of a register based on a dominating
+/// condition. To do so, this function checks to see if the basic block \p MBB
+/// is the target to which a conditional branch \p CondBr jumps and whose
+/// equality comparison is against a constant. If so, return a known physical
+/// register and constant value pair. Otherwise, return None.
+Optional<AArch64RedundantCopyElimination::RegImm>
+AArch64RedundantCopyElimination::knownRegValInBlock(
+ MachineInstr &CondBr, MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &FirstUse) {
+ unsigned Opc = CondBr.getOpcode();
+
// Check if the current basic block is the target block to which the
// CBZ/CBNZ instruction jumps when its Wt/Xt is zero.
- if ((Opc == AArch64::CBZW || Opc == AArch64::CBZX) &&
- MBB == MI.getOperand(1).getMBB())
- return true;
- else if ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) &&
- MBB != MI.getOperand(1).getMBB())
- return true;
-
- return false;
+ if (((Opc == AArch64::CBZW || Opc == AArch64::CBZX) &&
+ MBB == CondBr.getOperand(1).getMBB()) ||
+ ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) &&
+ MBB != CondBr.getOperand(1).getMBB())) {
+ FirstUse = CondBr;
+ return RegImm(CondBr.getOperand(0).getReg(), 0);
+ }
+
+ // Otherwise, must be a conditional branch.
+ if (Opc != AArch64::Bcc)
+ return None;
+
+ // Must be an equality check (i.e., == or !=).
+ AArch64CC::CondCode CC = (AArch64CC::CondCode)CondBr.getOperand(0).getImm();
+ if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
+ return None;
+
+ MachineBasicBlock *BrTarget = CondBr.getOperand(1).getMBB();
+ if ((CC == AArch64CC::EQ && BrTarget != MBB) ||
+ (CC == AArch64CC::NE && BrTarget == MBB))
+ return None;
+
+ // Stop if we get to the beginning of PredMBB.
+ MachineBasicBlock *PredMBB = *MBB->pred_begin();
+ assert(PredMBB == CondBr.getParent() &&
+ "Conditional branch not in predecessor block!");
+ if (CondBr == PredMBB->begin())
+ return None;
+
+ // Registers clobbered in PredMBB between CondBr instruction and current
+ // instruction being checked in loop.
+ ClobberedRegs.reset();
+
+ // Find compare instruction that sets NZCV used by CondBr.
+ MachineBasicBlock::reverse_iterator RIt = CondBr.getReverseIterator();
+ for (MachineInstr &PredI : make_range(std::next(RIt), PredMBB->rend())) {
+
+ // Track clobbered registers.
+ trackRegDefs(PredI, ClobberedRegs, TRI);
+
+ bool IsCMN = false;
+ switch (PredI.getOpcode()) {
+ default:
+ break;
+
+ // CMN is an alias for ADDS with a dead destination register.
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXri:
+ IsCMN = true;
+ // CMP is an alias for SUBS with a dead destination register.
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXri: {
+ MCPhysReg SrcReg = PredI.getOperand(1).getReg();
+
+ // Must not be a symbolic immediate.
+ if (!PredI.getOperand(2).isImm())
+ return None;
+
+ // The src register must not be modified between the cmp and conditional
+ // branch. This includes a self-clobbering compare.
+ if (ClobberedRegs[SrcReg])
+ return None;
+
+ // We've found the Cmp that sets NZCV.
+ int32_t KnownImm = PredI.getOperand(2).getImm();
+ int32_t Shift = PredI.getOperand(3).getImm();
+ KnownImm <<= Shift;
+ if (IsCMN)
+ KnownImm = -KnownImm;
+ FirstUse = PredI;
+ return RegImm(SrcReg, KnownImm);
+ }
+ }
+
+ // Bail if we see an instruction that defines NZCV that we don't handle.
+ if (PredI.definesRegister(AArch64::NZCV))
+ return None;
+ }
+ return None;
}
bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
@@ -85,79 +200,187 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
if (MBB->pred_size() != 1)
return false;
+ // Check if the predecessor has two successors, implying the block ends in a
+ // conditional branch.
MachineBasicBlock *PredMBB = *MBB->pred_begin();
- MachineBasicBlock::iterator CompBr = PredMBB->getLastNonDebugInstr();
- if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2)
+ if (PredMBB->succ_size() != 2)
+ return false;
+
+ MachineBasicBlock::iterator CondBr = PredMBB->getLastNonDebugInstr();
+ if (CondBr == PredMBB->end())
return false;
- ++CompBr;
+ // Keep track of the earliest point in the PredMBB block where kill markers
+ // need to be removed if a COPY is removed.
+ MachineBasicBlock::iterator FirstUse;
+ // After calling knownRegValInBlock, FirstUse will either point to a CBZ/CBNZ
+ // or a compare (i.e., SUBS). In the latter case, we must take care when
+ // updating FirstUse when scanning for COPY instructions. In particular, if
+ // there's a COPY in between the compare and branch the COPY should not
+ // update FirstUse.
+ bool SeenFirstUse = false;
+ // Registers that contain a known value at the start of MBB.
+ SmallVector<RegImm, 4> KnownRegs;
+
+ MachineBasicBlock::iterator Itr = std::next(CondBr);
do {
- --CompBr;
- if (guaranteesZeroRegInBlock(*CompBr, MBB))
- break;
- } while (CompBr != PredMBB->begin() && CompBr->isTerminator());
+ --Itr;
- // We've not found a CBZ/CBNZ, time to bail out.
- if (!guaranteesZeroRegInBlock(*CompBr, MBB))
- return false;
+ Optional<RegImm> KnownRegImm = knownRegValInBlock(*Itr, MBB, FirstUse);
+ if (KnownRegImm == None)
+ continue;
- unsigned TargetReg = CompBr->getOperand(0).getReg();
- if (!TargetReg)
- return false;
- assert(TargetRegisterInfo::isPhysicalRegister(TargetReg) &&
- "Expect physical register");
+ KnownRegs.push_back(*KnownRegImm);
+
+ // Reset the clobber list, which is used by knownRegValInBlock.
+ ClobberedRegs.reset();
+
+ // Look backward in PredMBB for COPYs from the known reg to find other
+ // registers that are known to be a constant value.
+ for (auto PredI = Itr;; --PredI) {
+ if (FirstUse == PredI)
+ SeenFirstUse = true;
+
+ if (PredI->isCopy()) {
+ MCPhysReg CopyDstReg = PredI->getOperand(0).getReg();
+ MCPhysReg CopySrcReg = PredI->getOperand(1).getReg();
+ for (auto &KnownReg : KnownRegs) {
+ if (ClobberedRegs[KnownReg.Reg])
+ continue;
+ // If we have X = COPY Y, and Y is known to be zero, then now X is
+ // known to be zero.
+ if (CopySrcReg == KnownReg.Reg && !ClobberedRegs[CopyDstReg]) {
+ KnownRegs.push_back(RegImm(CopyDstReg, KnownReg.Imm));
+ if (SeenFirstUse)
+ FirstUse = PredI;
+ break;
+ }
+ // If we have X = COPY Y, and X is known to be zero, then now Y is
+ // known to be zero.
+ if (CopyDstReg == KnownReg.Reg && !ClobberedRegs[CopySrcReg]) {
+ KnownRegs.push_back(RegImm(CopySrcReg, KnownReg.Imm));
+ if (SeenFirstUse)
+ FirstUse = PredI;
+ break;
+ }
+ }
+ }
+
+ // Stop if we get to the beginning of PredMBB.
+ if (PredI == PredMBB->begin())
+ break;
+
+ trackRegDefs(*PredI, ClobberedRegs, TRI);
+ // Stop if all of the known-zero regs have been clobbered.
+ if (all_of(KnownRegs, [&](RegImm KnownReg) {
+ return ClobberedRegs[KnownReg.Reg];
+ }))
+ break;
+ }
+ break;
+
+ } while (Itr != PredMBB->begin() && Itr->isTerminator());
- // Remember all registers aliasing with TargetReg.
- SmallSetVector<unsigned, 8> TargetRegs;
- for (MCRegAliasIterator AI(TargetReg, TRI, true); AI.isValid(); ++AI)
- TargetRegs.insert(*AI);
+ // We've not found a registers with a known value, time to bail out.
+ if (KnownRegs.empty())
+ return false;
bool Changed = false;
+ // UsedKnownRegs is the set of KnownRegs that have had uses added to MBB.
+ SmallSetVector<unsigned, 4> UsedKnownRegs;
MachineBasicBlock::iterator LastChange = MBB->begin();
- unsigned SmallestDef = TargetReg;
- // Remove redundant Copy instructions unless TargetReg is modified.
+ // Remove redundant Copy instructions unless KnownReg is modified.
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
MachineInstr *MI = &*I;
++I;
- if (MI->isCopy() && MI->getOperand(0).isReg() &&
- MI->getOperand(1).isReg()) {
-
- unsigned DefReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
-
- if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) &&
- !MRI->isReserved(DefReg) &&
- (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) {
- DEBUG(dbgs() << "Remove redundant Copy : ");
- DEBUG((MI)->print(dbgs()));
-
- MI->eraseFromParent();
- Changed = true;
- LastChange = I;
- NumCopiesRemoved++;
- SmallestDef =
- TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef;
- continue;
+ bool RemovedMI = false;
+ bool IsCopy = MI->isCopy();
+ bool IsMoveImm = MI->isMoveImmediate();
+ if (IsCopy || IsMoveImm) {
+ MCPhysReg DefReg = MI->getOperand(0).getReg();
+ MCPhysReg SrcReg = IsCopy ? MI->getOperand(1).getReg() : 0;
+ int64_t SrcImm = IsMoveImm ? MI->getOperand(1).getImm() : 0;
+ if (!MRI->isReserved(DefReg) &&
+ ((IsCopy && (SrcReg == AArch64::XZR || SrcReg == AArch64::WZR)) ||
+ IsMoveImm)) {
+ for (RegImm &KnownReg : KnownRegs) {
+ if (KnownReg.Reg != DefReg &&
+ !TRI->isSuperRegister(DefReg, KnownReg.Reg))
+ continue;
+
+ // For a copy, the known value must be a zero.
+ if (IsCopy && KnownReg.Imm != 0)
+ continue;
+
+ if (IsMoveImm) {
+ // For a move immediate, the known immediate must match the source
+ // immediate.
+ if (KnownReg.Imm != SrcImm)
+ continue;
+
+ // Don't remove a move immediate that implicitly defines the upper
+ // bits when only the lower 32 bits are known.
+ MCPhysReg CmpReg = KnownReg.Reg;
+ if (any_of(MI->implicit_operands(), [CmpReg](MachineOperand &O) {
+ return !O.isDead() && O.isReg() && O.isDef() &&
+ O.getReg() != CmpReg;
+ }))
+ continue;
+ }
+
+ if (IsCopy)
+ DEBUG(dbgs() << "Remove redundant Copy : " << *MI);
+ else
+ DEBUG(dbgs() << "Remove redundant Move : " << *MI);
+
+ MI->eraseFromParent();
+ Changed = true;
+ LastChange = I;
+ NumCopiesRemoved++;
+ UsedKnownRegs.insert(KnownReg.Reg);
+ RemovedMI = true;
+ break;
+ }
}
}
- if (MI->modifiesRegister(TargetReg, TRI))
+ // Skip to the next instruction if we removed the COPY/MovImm.
+ if (RemovedMI)
+ continue;
+
+ // Remove any regs the MI clobbers from the KnownConstRegs set.
+ for (unsigned RI = 0; RI < KnownRegs.size();)
+ if (MI->modifiesRegister(KnownRegs[RI].Reg, TRI)) {
+ std::swap(KnownRegs[RI], KnownRegs[KnownRegs.size() - 1]);
+ KnownRegs.pop_back();
+ // Don't increment RI since we need to now check the swapped-in
+ // KnownRegs[RI].
+ } else {
+ ++RI;
+ }
+
+ // Continue until the KnownRegs set is empty.
+ if (KnownRegs.empty())
break;
}
if (!Changed)
return false;
- // Otherwise, we have to fixup the use-def chain, starting with the
- // CBZ/CBNZ. Conservatively mark as much as we can live.
- CompBr->clearRegisterKills(SmallestDef, TRI);
+ // Add newly used regs to the block's live-in list if they aren't there
+ // already.
+ for (MCPhysReg KnownReg : UsedKnownRegs)
+ if (!MBB->isLiveIn(KnownReg))
+ MBB->addLiveIn(KnownReg);
- if (none_of(TargetRegs, [&](unsigned Reg) { return MBB->isLiveIn(Reg); }))
- MBB->addLiveIn(TargetReg);
-
- // Clear any kills of TargetReg between CompBr and the last removed COPY.
+ // Clear kills in the range where changes were made. This is conservative,
+ // but should be okay since kill markers are being phased out.
+ DEBUG(dbgs() << "Clearing kill flags.\n\tFirstUse: " << *FirstUse
+ << "\tLastChange: " << *LastChange);
+ for (MachineInstr &MMI : make_range(FirstUse, PredMBB->end()))
+ MMI.clearKillInfo();
for (MachineInstr &MMI : make_range(MBB->begin(), LastChange))
- MMI.clearRegisterKills(SmallestDef, TRI);
+ MMI.clearKillInfo();
return true;
}
@@ -168,6 +391,11 @@ bool AArch64RedundantCopyElimination::runOnMachineFunction(
return false;
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
+
+ // Resize the clobber register bitfield tracker. We do this once per
+ // function and then clear the bitfield each time we optimize a copy.
+ ClobberedRegs.resize(TRI->getNumRegs());
+
bool Changed = false;
for (MachineBasicBlock &MBB : MF)
Changed |= optimizeCopy(&MBB);
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index b292c9c87dcd..20a5979f9b4b 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -1,4 +1,4 @@
-//===- AArch64RegisterBankInfo.cpp -------------------------------*- C++ -*-==//
+//===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,13 +13,24 @@
//===----------------------------------------------------------------------===//
#include "AArch64RegisterBankInfo.h"
-#include "AArch64InstrInfo.h" // For XXXRegClassID.
+#include "AArch64InstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+
+#define GET_TARGET_REGBANK_IMPL
+#include "AArch64GenRegisterBank.inc"
// This file will be TableGen'ed at some point.
#include "AArch64GenRegisterBankInfo.def"
@@ -31,7 +42,7 @@ using namespace llvm;
#endif
AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
- : RegisterBankInfo(AArch64::RegBanks, AArch64::NumRegisterBanks) {
+ : AArch64GenRegisterBankInfo() {
static bool AlreadyInit = false;
// We have only one set of register banks, whatever the subtarget
// is. Therefore, the initialization of the RegBanks table should be
@@ -78,44 +89,21 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
// Check that the TableGen'ed like file is in sync we our expectations.
// First, the Idx.
- assert(AArch64::PartialMappingIdx::PMI_GPR32 ==
- AArch64::PartialMappingIdx::PMI_FirstGPR &&
- "GPR32 index not first in the GPR list");
- assert(AArch64::PartialMappingIdx::PMI_GPR64 ==
- AArch64::PartialMappingIdx::PMI_LastGPR &&
- "GPR64 index not last in the GPR list");
- assert(AArch64::PartialMappingIdx::PMI_FirstGPR <=
- AArch64::PartialMappingIdx::PMI_LastGPR &&
- "GPR list is backward");
- assert(AArch64::PartialMappingIdx::PMI_FPR32 ==
- AArch64::PartialMappingIdx::PMI_FirstFPR &&
- "FPR32 index not first in the FPR list");
- assert(AArch64::PartialMappingIdx::PMI_FPR512 ==
- AArch64::PartialMappingIdx::PMI_LastFPR &&
- "FPR512 index not last in the FPR list");
- assert(AArch64::PartialMappingIdx::PMI_FirstFPR <=
- AArch64::PartialMappingIdx::PMI_LastFPR &&
- "FPR list is backward");
- assert(AArch64::PartialMappingIdx::PMI_FPR32 + 1 ==
- AArch64::PartialMappingIdx::PMI_FPR64 &&
- AArch64::PartialMappingIdx::PMI_FPR64 + 1 ==
- AArch64::PartialMappingIdx::PMI_FPR128 &&
- AArch64::PartialMappingIdx::PMI_FPR128 + 1 ==
- AArch64::PartialMappingIdx::PMI_FPR256 &&
- AArch64::PartialMappingIdx::PMI_FPR256 + 1 ==
- AArch64::PartialMappingIdx::PMI_FPR512 &&
- "FPR indices not properly ordered");
+ assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
+ {PMI_GPR32, PMI_GPR64}) &&
+ "PartialMappingIdx's are incorrectly ordered");
+ assert(checkPartialMappingIdx(
+ PMI_FirstFPR, PMI_LastFPR,
+ {PMI_FPR32, PMI_FPR64, PMI_FPR128, PMI_FPR256, PMI_FPR512}) &&
+ "PartialMappingIdx's are incorrectly ordered");
// Now, the content.
// Check partial mapping.
#define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
do { \
- const PartialMapping &Map = \
- AArch64::PartMappings[AArch64::PartialMappingIdx::Idx - \
- AArch64::PartialMappingIdx::PMI_Min]; \
- (void)Map; \
- assert(Map.StartIdx == ValStartIdx && Map.Length == ValLength && \
- Map.RegBank == &RB && #Idx " is incorrectly initialized"); \
- } while (0)
+ assert( \
+ checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
+ #Idx " is incorrectly initialized"); \
+ } while (false)
CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
@@ -128,17 +116,11 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
// Check value mapping.
#define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
do { \
- unsigned PartialMapBaseIdx = \
- AArch64::PartialMappingIdx::PMI_##RBName##Size - \
- AArch64::PartialMappingIdx::PMI_Min; \
- (void)PartialMapBaseIdx; \
- const ValueMapping &Map = AArch64::getValueMapping( \
- AArch64::PartialMappingIdx::PMI_First##RBName, Size)[Offset]; \
- (void)Map; \
- assert(Map.BreakDown == &AArch64::PartMappings[PartialMapBaseIdx] && \
- Map.NumBreakDowns == 1 && #RBName #Size \
- " " #Offset " is incorrectly initialized"); \
- } while (0)
+ assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
+ PartialMappingIdx::PMI_First##RBName, Size, \
+ Offset) && \
+ #RBName #Size " " #Offset " is incorrectly initialized"); \
+ } while (false)
#define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
@@ -157,7 +139,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
- } while (0)
+ } while (false)
CHECK_VALUEMAP_3OPS(GPR, 32);
CHECK_VALUEMAP_3OPS(GPR, 64);
@@ -169,24 +151,23 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
#define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
do { \
- unsigned PartialMapDstIdx = \
- AArch64::PMI_##RBNameDst##Size - AArch64::PMI_Min; \
- unsigned PartialMapSrcIdx = \
- AArch64::PMI_##RBNameSrc##Size - AArch64::PMI_Min; \
- (void) PartialMapDstIdx; \
- (void) PartialMapSrcIdx; \
- const ValueMapping *Map = AArch64::getCopyMapping( \
- AArch64::PMI_First##RBNameDst == AArch64::PMI_FirstGPR, \
- AArch64::PMI_First##RBNameSrc == AArch64::PMI_FirstGPR, Size); \
- (void) Map; \
- assert(Map[0].BreakDown == &AArch64::PartMappings[PartialMapDstIdx] && \
+ unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
+ unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
+ (void)PartialMapDstIdx; \
+ (void)PartialMapSrcIdx; \
+ const ValueMapping *Map = getCopyMapping( \
+ AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \
+ (void)Map; \
+ assert(Map[0].BreakDown == \
+ &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
Map[0].NumBreakDowns == 1 && #RBNameDst #Size \
" Dst is incorrectly initialized"); \
- assert(Map[1].BreakDown == &AArch64::PartMappings[PartialMapSrcIdx] && \
+ assert(Map[1].BreakDown == \
+ &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \
" Src is incorrectly initialized"); \
\
- } while (0)
+ } while (false)
CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
@@ -280,12 +261,10 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
break;
InstructionMappings AltMappings;
InstructionMapping GPRMapping(
- /*ID*/ 1, /*Cost*/ 1,
- AArch64::getValueMapping(AArch64::PMI_FirstGPR, Size),
+ /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
/*NumOperands*/ 3);
InstructionMapping FPRMapping(
- /*ID*/ 2, /*Cost*/ 1,
- AArch64::getValueMapping(AArch64::PMI_FirstFPR, Size),
+ /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
/*NumOperands*/ 3);
AltMappings.emplace_back(std::move(GPRMapping));
@@ -305,21 +284,21 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
InstructionMappings AltMappings;
InstructionMapping GPRMapping(
/*ID*/ 1, /*Cost*/ 1,
- AArch64::getCopyMapping(/*DstIsGPR*/ true, /*SrcIsGPR*/ true, Size),
+ getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
/*NumOperands*/ 2);
InstructionMapping FPRMapping(
/*ID*/ 2, /*Cost*/ 1,
- AArch64::getCopyMapping(/*DstIsGPR*/ false, /*SrcIsGPR*/ false, Size),
+ getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
/*NumOperands*/ 2);
InstructionMapping GPRToFPRMapping(
/*ID*/ 3,
/*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
- AArch64::getCopyMapping(/*DstIsGPR*/ false, /*SrcIsGPR*/ true, Size),
+ getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
/*NumOperands*/ 2);
InstructionMapping FPRToGPRMapping(
/*ID*/ 3,
/*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
- AArch64::getCopyMapping(/*DstIsGPR*/ true, /*SrcIsGPR*/ false, Size),
+ getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
/*NumOperands*/ 2);
AltMappings.emplace_back(std::move(GPRMapping));
@@ -341,17 +320,15 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
InstructionMappings AltMappings;
InstructionMapping GPRMapping(
/*ID*/ 1, /*Cost*/ 1,
- getOperandsMapping(
- {AArch64::getValueMapping(AArch64::PMI_FirstGPR, Size),
- // Addresses are GPR 64-bit.
- AArch64::getValueMapping(AArch64::PMI_FirstGPR, 64)}),
+ getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
+ // Addresses are GPR 64-bit.
+ getValueMapping(PMI_FirstGPR, 64)}),
/*NumOperands*/ 2);
InstructionMapping FPRMapping(
/*ID*/ 2, /*Cost*/ 1,
- getOperandsMapping(
- {AArch64::getValueMapping(AArch64::PMI_FirstFPR, Size),
- // Addresses are GPR 64-bit.
- AArch64::getValueMapping(AArch64::PMI_FirstGPR, 64)}),
+ getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
+ // Addresses are GPR 64-bit.
+ getValueMapping(PMI_FirstGPR, 64)}),
/*NumOperands*/ 2);
AltMappings.emplace_back(std::move(GPRMapping));
@@ -369,13 +346,12 @@ void AArch64RegisterBankInfo::applyMappingImpl(
switch (OpdMapper.getMI().getOpcode()) {
case TargetOpcode::G_OR:
case TargetOpcode::G_BITCAST:
- case TargetOpcode::G_LOAD: {
+ case TargetOpcode::G_LOAD:
// Those ID must match getInstrAlternativeMappings.
assert((OpdMapper.getInstrMapping().getID() >= 1 &&
OpdMapper.getInstrMapping().getID() <= 4) &&
"Don't know how to handle that ID");
return applyDefaultMapping(OpdMapper);
- }
default:
llvm_unreachable("Don't know how to handle that operation");
}
@@ -411,6 +387,8 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(const MachineInstr &MI) {
unsigned Size = Ty.getSizeInBits();
bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
+ PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
+
#ifndef NDEBUG
// Make sure all the operands are using similar size and type.
// Should probably be checked by the machine verifier.
@@ -422,20 +400,19 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(const MachineInstr &MI) {
// for each types.
for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
- assert(AArch64::getRegBankBaseIdxOffset(OpTy.getSizeInBits()) ==
- AArch64::getRegBankBaseIdxOffset(Size) &&
- "Operand has incompatible size");
+ assert(
+ AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
+ RBIdx, OpTy.getSizeInBits()) ==
+ AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
+ "Operand has incompatible size");
bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
(void)OpIsFPR;
assert(IsFPR == OpIsFPR && "Operand has incompatible type");
}
#endif // End NDEBUG.
- AArch64::PartialMappingIdx RBIdx =
- IsFPR ? AArch64::PMI_FirstFPR : AArch64::PMI_FirstGPR;
-
- return InstructionMapping{DefaultMappingID, 1,
- AArch64::getValueMapping(RBIdx, Size), NumOperands};
+ return InstructionMapping{DefaultMappingID, 1, getValueMapping(RBIdx, Size),
+ NumOperands};
}
RegisterBankInfo::InstructionMapping
@@ -485,9 +462,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
const RegisterBank &SrcRB =
SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
- return InstructionMapping{DefaultMappingID, copyCost(DstRB, SrcRB, Size),
- AArch64::getCopyMapping(DstIsGPR, SrcIsGPR, Size),
- /*NumOperands*/ 2};
+ return InstructionMapping{
+ DefaultMappingID, copyCost(DstRB, SrcRB, Size),
+ getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
+ /*NumOperands*/ 2};
}
case TargetOpcode::G_SEQUENCE:
// FIXME: support this, but the generic code is really not going to do
@@ -501,7 +479,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Track the size and bank of each register. We don't do partial mappings.
SmallVector<unsigned, 4> OpSize(NumOperands);
- SmallVector<AArch64::PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
+ SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
auto &MO = MI.getOperand(Idx);
if (!MO.isReg())
@@ -513,9 +491,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
// For floating-point instructions, scalars go in FPRs.
if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc))
- OpRegBankIdx[Idx] = AArch64::PMI_FirstFPR;
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
else
- OpRegBankIdx[Idx] = AArch64::PMI_FirstGPR;
+ OpRegBankIdx[Idx] = PMI_FirstGPR;
}
unsigned Cost = 1;
@@ -523,49 +501,50 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// fine-tune the computed mapping.
switch (Opc) {
case TargetOpcode::G_SITOFP:
- case TargetOpcode::G_UITOFP: {
- OpRegBankIdx = {AArch64::PMI_FirstFPR, AArch64::PMI_FirstGPR};
+ case TargetOpcode::G_UITOFP:
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
break;
- }
case TargetOpcode::G_FPTOSI:
- case TargetOpcode::G_FPTOUI: {
- OpRegBankIdx = {AArch64::PMI_FirstGPR, AArch64::PMI_FirstFPR};
+ case TargetOpcode::G_FPTOUI:
+ OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
- }
- case TargetOpcode::G_FCMP: {
- OpRegBankIdx = {AArch64::PMI_FirstGPR,
- /* Predicate */ AArch64::PMI_None, AArch64::PMI_FirstFPR,
- AArch64::PMI_FirstFPR};
+ case TargetOpcode::G_FCMP:
+ OpRegBankIdx = {PMI_FirstGPR,
+ /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
break;
- }
- case TargetOpcode::G_BITCAST: {
+ case TargetOpcode::G_BITCAST:
// This is going to be a cross register bank copy and this is expensive.
if (OpRegBankIdx[0] != OpRegBankIdx[1])
- Cost =
- copyCost(*AArch64::PartMappings[OpRegBankIdx[0]].RegBank,
- *AArch64::PartMappings[OpRegBankIdx[1]].RegBank, OpSize[0]);
+ Cost = copyCost(
+ *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
+ *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
+ OpSize[0]);
break;
- }
- case TargetOpcode::G_LOAD: {
+ case TargetOpcode::G_LOAD:
// Loading in vector unit is slightly more expensive.
// This is actually only true for the LD1R and co instructions,
// but anyway for the fast mode this number does not matter and
// for the greedy mode the cost of the cross bank copy will
// offset this number.
// FIXME: Should be derived from the scheduling model.
- if (OpRegBankIdx[0] >= AArch64::PMI_FirstFPR)
+ if (OpRegBankIdx[0] >= PMI_FirstFPR)
Cost = 2;
- }
+ break;
}
// Finally construct the computed mapping.
RegisterBankInfo::InstructionMapping Mapping =
InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands};
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
- for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
- if (MI.getOperand(Idx).isReg())
- OpdsMapping[Idx] =
- AArch64::getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ if (MI.getOperand(Idx).isReg()) {
+ auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
+ if (!Mapping->isValid())
+ return InstructionMapping();
+
+ OpdsMapping[Idx] = Mapping;
+ }
+ }
Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
return Mapping;
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.h b/lib/Target/AArch64/AArch64RegisterBankInfo.h
index f763235049d4..0a795a42c0b1 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.h
@@ -16,25 +16,78 @@
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#define GET_REGBANK_DECLARATIONS
+#include "AArch64GenRegisterBank.inc"
+
namespace llvm {
class TargetRegisterInfo;
-namespace AArch64 {
-enum {
- GPRRegBankID = 0, /// General Purpose Registers: W, X.
- FPRRegBankID = 1, /// Floating Point/Vector Registers: B, H, S, D, Q.
- CCRRegBankID = 2, /// Conditional register: NZCV.
- NumRegisterBanks
-};
+class AArch64GenRegisterBankInfo : public RegisterBankInfo {
+protected:
+
+ enum PartialMappingIdx {
+ PMI_None = -1,
+ PMI_FPR32 = 1,
+ PMI_FPR64,
+ PMI_FPR128,
+ PMI_FPR256,
+ PMI_FPR512,
+ PMI_GPR32,
+ PMI_GPR64,
+ PMI_FirstGPR = PMI_GPR32,
+ PMI_LastGPR = PMI_GPR64,
+ PMI_FirstFPR = PMI_FPR32,
+ PMI_LastFPR = PMI_FPR512,
+ PMI_Min = PMI_FirstFPR,
+ };
+
+ static RegisterBankInfo::PartialMapping PartMappings[];
+ static RegisterBankInfo::ValueMapping ValMappings[];
+ static PartialMappingIdx BankIDToCopyMapIdx[];
+
+ enum ValueMappingIdx {
+ InvalidIdx = 0,
+ First3OpsIdx = 1,
+ Last3OpsIdx = 19,
+ DistanceBetweenRegBanks = 3,
+ FirstCrossRegCpyIdx = 22,
+ LastCrossRegCpyIdx = 34,
+ DistanceBetweenCrossRegCpy = 2
+ };
+
+ static bool checkPartialMap(unsigned Idx, unsigned ValStartIdx,
+ unsigned ValLength, const RegisterBank &RB);
+ static bool checkValueMapImpl(unsigned Idx, unsigned FirstInBank,
+ unsigned Size, unsigned Offset);
+ static bool checkPartialMappingIdx(PartialMappingIdx FirstAlias,
+ PartialMappingIdx LastAlias,
+ ArrayRef<PartialMappingIdx> Order);
-extern RegisterBank GPRRegBank;
-extern RegisterBank FPRRegBank;
-extern RegisterBank CCRRegBank;
-} // End AArch64 namespace.
+ static unsigned getRegBankBaseIdxOffset(unsigned RBIdx, unsigned Size);
+
+ /// Get the pointer to the ValueMapping representing the RegisterBank
+ /// at \p RBIdx with a size of \p Size.
+ ///
+ /// The returned mapping works for instructions with the same kind of
+ /// operands for up to 3 operands.
+ ///
+ /// \pre \p RBIdx != PartialMappingIdx::None
+ static const RegisterBankInfo::ValueMapping *
+ getValueMapping(PartialMappingIdx RBIdx, unsigned Size);
+
+ /// Get the pointer to the ValueMapping of the operands of a copy
+ /// instruction from the \p SrcBankID register bank to the \p DstBankID
+ /// register bank with a size of \p Size.
+ static const RegisterBankInfo::ValueMapping *
+ getCopyMapping(unsigned DstBankID, unsigned SrcBankID, unsigned Size);
+
+#define GET_TARGET_REGBANK_CLASS
+#include "AArch64GenRegisterBank.inc"
+};
/// This class provides the information for the target register banks.
-class AArch64RegisterBankInfo final : public RegisterBankInfo {
+class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
diff --git a/lib/Target/AArch64/AArch64RegisterBanks.td b/lib/Target/AArch64/AArch64RegisterBanks.td
new file mode 100644
index 000000000000..c2b6c0b04e9b
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterBanks.td
@@ -0,0 +1,20 @@
+//=- AArch64RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+/// General Purpose Registers: W, X.
+def GPRRegBank : RegisterBank<"GPR", [GPR64all]>;
+
+/// Floating Point/Vector Registers: B, H, S, D, Q.
+def FPRRegBank : RegisterBank<"FPR", [QQQQ]>;
+
+/// Conditional register: NZCV.
+def CCRRegBank : RegisterBank<"CCR", [CCR]>;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 98fad71aa18a..baf15ac540cf 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -118,25 +118,17 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
- markSuperRegs(Reserved, AArch64::SP);
- markSuperRegs(Reserved, AArch64::XZR);
markSuperRegs(Reserved, AArch64::WSP);
markSuperRegs(Reserved, AArch64::WZR);
- if (TFI->hasFP(MF) || TT.isOSDarwin()) {
- markSuperRegs(Reserved, AArch64::FP);
+ if (TFI->hasFP(MF) || TT.isOSDarwin())
markSuperRegs(Reserved, AArch64::W29);
- }
- if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved()) {
- markSuperRegs(Reserved, AArch64::X18); // Platform register
- markSuperRegs(Reserved, AArch64::W18);
- }
+ if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved())
+ markSuperRegs(Reserved, AArch64::W18); // Platform register
- if (hasBasePointer(MF)) {
- markSuperRegs(Reserved, AArch64::X19);
+ if (hasBasePointer(MF))
markSuperRegs(Reserved, AArch64::W19);
- }
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td
index 93ca079275c8..18d000ace94c 100644
--- a/lib/Target/AArch64/AArch64SchedA53.td
+++ b/lib/Target/AArch64/AArch64SchedA53.td
@@ -13,7 +13,7 @@
// ===---------------------------------------------------------------------===//
// The following definitions describe the simpler per-operand machine model.
-// This works with MachineScheduler. See MCSchedModel.h for details.
+// This works with MachineScheduler. See MCSchedule.h for details.
// Cortex-A53 machine model for scheduling and other instruction cost heuristics.
def CortexA53Model : SchedMachineModel {
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
index 99c48d0146e4..303398ea0b7f 100644
--- a/lib/Target/AArch64/AArch64SchedA57.td
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -162,7 +162,9 @@ def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>;
// Cryptography Extensions
// -----------------------------------------------------------------------------
-def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
+def A57ReadAES : SchedReadAdvance<3, [A57Write_3cyc_1W]>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^AES[DE]")>;
+def : InstRW<[A57Write_3cyc_1W, A57ReadAES], (instregex "^AESI?MC")>;
def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
diff --git a/lib/Target/AArch64/AArch64SchedFalkor.td b/lib/Target/AArch64/AArch64SchedFalkor.td
index 19a6d6f2a1ad..eec089087fe0 100644
--- a/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -17,10 +17,112 @@
// instruction cost model.
def FalkorModel : SchedMachineModel {
- let IssueWidth = 4; // 4-wide issue for expanded uops.
+ let IssueWidth = 8; // 8 uops are dispatched per cycle.
let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer.
let LoopMicroOpBufferSize = 16;
let LoadLatency = 3; // Optimistic load latency.
let MispredictPenalty = 11; // Minimum branch misprediction penalty.
- let CompleteModel = 0;
+ let CompleteModel = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Falkor.
+
+let SchedModel = FalkorModel in {
+
+ def FalkorUnitB : ProcResource<1>; // Branch
+ def FalkorUnitLD : ProcResource<1>; // Load pipe
+ def FalkorUnitSD : ProcResource<1>; // Store data
+ def FalkorUnitST : ProcResource<1>; // Store pipe
+ def FalkorUnitX : ProcResource<1>; // Complex arithmetic
+ def FalkorUnitY : ProcResource<1>; // Simple arithmetic
+ def FalkorUnitZ : ProcResource<1>; // Simple arithmetic
+
+ def FalkorUnitVSD : ProcResource<1>; // Vector store data
+ def FalkorUnitVX : ProcResource<1>; // Vector X-pipe
+ def FalkorUnitVY : ProcResource<1>; // Vector Y-pipe
+
+ def FalkorUnitGTOV : ProcResource<1>; // Scalar to Vector
+ def FalkorUnitVTOG : ProcResource<1>; // Vector to Scalar
+
+ // Define the resource groups.
+ def FalkorUnitXY : ProcResGroup<[FalkorUnitX, FalkorUnitY]>;
+ def FalkorUnitXYZ : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ]>;
+ def FalkorUnitXYZB : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ,
+ FalkorUnitB]>;
+ def FalkorUnitZB : ProcResGroup<[FalkorUnitZ, FalkorUnitB]>;
+ def FalkorUnitVXVY : ProcResGroup<[FalkorUnitVX, FalkorUnitVY]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for
+// Falkor.
+
+let SchedModel = FalkorModel in {
+
+def : WriteRes<WriteImm, [FalkorUnitXYZ]> { let Latency = 1; }
+def : WriteRes<WriteI, [FalkorUnitXYZ]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [FalkorUnitVXVY, FalkorUnitVXVY]>
+ { let Latency = 1; let NumMicroOps = 2; }
+def : WriteRes<WriteIEReg, [FalkorUnitXYZ, FalkorUnitXYZ]>
+ { let Latency = 2; let NumMicroOps = 2; }
+def : WriteRes<WriteExtr, [FalkorUnitXYZ, FalkorUnitXYZ]>
+ { let Latency = 2; let NumMicroOps = 2; }
+def : WriteRes<WriteIS, [FalkorUnitXYZ]> { let Latency = 1; }
+def : WriteRes<WriteID32, [FalkorUnitX, FalkorUnitZ]>
+ { let Latency = 8; let NumMicroOps = 2; }
+def : WriteRes<WriteID64, [FalkorUnitX, FalkorUnitZ]>
+ { let Latency = 16; let NumMicroOps = 2; }
+def : WriteRes<WriteIM32, [FalkorUnitX]> { let Latency = 4; }
+def : WriteRes<WriteIM64, [FalkorUnitX]> { let Latency = 5; }
+def : WriteRes<WriteBr, [FalkorUnitB]> { let Latency = 1; }
+def : WriteRes<WriteBrReg, [FalkorUnitB]> { let Latency = 1; }
+def : WriteRes<WriteLD, [FalkorUnitLD]> { let Latency = 3; }
+def : WriteRes<WriteST, [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]>
+ { let Latency = 3; let NumMicroOps = 3; }
+def : WriteRes<WriteSTP, [FalkorUnitST, FalkorUnitSD]>
+ { let Latency = 0; let NumMicroOps = 2; }
+def : WriteRes<WriteAdr, [FalkorUnitXYZ]> { let Latency = 5; }
+def : WriteRes<WriteLDIdx, [FalkorUnitLD]> { let Latency = 5; }
+def : WriteRes<WriteSTIdx, [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]>
+ { let Latency = 4; let NumMicroOps = 3; }
+def : WriteRes<WriteF, [FalkorUnitVXVY, FalkorUnitVXVY]>
+ { let Latency = 3; let NumMicroOps = 2; }
+def : WriteRes<WriteFCmp, [FalkorUnitVXVY]> { let Latency = 2; }
+def : WriteRes<WriteFCvt, [FalkorUnitVXVY]> { let Latency = 4; }
+def : WriteRes<WriteFCopy, [FalkorUnitVXVY]> { let Latency = 4; }
+def : WriteRes<WriteFImm, [FalkorUnitVXVY]> { let Latency = 4; }
+def : WriteRes<WriteFMul, [FalkorUnitVXVY, FalkorUnitVXVY]>
+ { let Latency = 6; let NumMicroOps = 2; }
+def : WriteRes<WriteFDiv, [FalkorUnitVXVY, FalkorUnitVXVY]>
+ { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
+def : WriteRes<WriteV, [FalkorUnitVXVY]> { let Latency = 6; }
+def : WriteRes<WriteVLD, [FalkorUnitLD]> { let Latency = 3; }
+def : WriteRes<WriteVST, [FalkorUnitST, FalkorUnitVSD]>
+ { let Latency = 0; let NumMicroOps = 2; }
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteLDHi, []> { let Latency = 3; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// No forwarding logic is modelled yet.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+// Detailed Refinements
+// -----------------------------------------------------------------------------
+include "AArch64SchedFalkorDetails.td"
+
}
diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
new file mode 100644
index 000000000000..6bce4ef6b652
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -0,0 +1,523 @@
+//==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the uop and latency details for the machine model for the
+// Qualcomm Falkor subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+include "AArch64SchedFalkorWriteRes.td"
+
+//===----------------------------------------------------------------------===//
+// Specialize the coarse model by associating instruction groups with the
+// subtarget-defined types. As the modeled is refined, this will override most
+// of the earlier mappings.
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// SIMD Floating-point Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v4f16|v2i16p|v2i32p)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(16|32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(16|32|64|v2f32|v4f16|v2i32|v4i16)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i16|v1i32|v1i64|v2i32|v4i16)rz$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?V(v4i16|v4i32|v8i16)v$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)(v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i16p|v2i32p|v2i64p|v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i32|v4i16)(_shift)?$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instrs FMULX16, FMULX32)>;
+
+def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
+def : InstRW<[FalkorWr_1VXVY_6cyc], (instrs FMULX64)>;
+
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32|v8f16)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v8f16|v2i64p)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32|v8i16)rz$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32|v8f16)$")>;
+
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)(v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32|v8f16)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32|v8f16)$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(FCVTL|FCVTL2)(v2i32|v4i16|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32|v8i16)(_shift)?$")>;
+
+def : InstRW<[FalkorWr_2VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
+
+def : InstRW<[FalkorWr_3VXVY_4cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v2i32|v4i16|v4i32|v8i16|v4f32)$")>;
+
+def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32|v8f16)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc, FalkorReadVMA],(instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc, FalkorReadVMA],(instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^FML(A|S)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
+def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^FML(A|S)v1i64_indexed$")>;
+def : InstRW<[FalkorWr_2VXVY_5cyc, FalkorReadFMA],(instregex "^FML(A|S)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
+def : InstRW<[FalkorWr_2VXVY_6cyc, FalkorReadFMA],(instregex "^FML(A|S)v2i64_indexed$")>;
+// SIMD Integer Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs ADDPv2i64p)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIC|ORR)(v2i32|v4i16)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)ADDLVv4i16v$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQDMULL(i16|i32)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQRDML(A|S)?H(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs ADDVv4i32v)>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs ADDVv8i16v)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(ADD|SUB)HNv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs ADDVv16i8v)>;
+
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>;
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^R(ADD|SUB)HNv.*$")>;
+
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs ADDPv2i64)>; // sz==11
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIC|ORR)(v8i16|v4i32)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)ADDLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SUBLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL2?(v8i8|v16i8)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABDLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL2?(v1i64|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^SQDMULLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>;
+
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(S|U)ADDLVv8i16v$")>;
+
+def : InstRW<[FalkorWr_3VXVY_6cyc], (instregex "^(S|U)ADDLVv16i8v$")>;
+
+def : InstRW<[FalkorWr_4VXVY_2cyc], (instregex "^(S|U)(ADD|SUB)Wv.*$")>;
+
+def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>;
+
+def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc, FalkorReadVMA],(instregex "^SQD(MLAL|MLSL)(i16|i32)$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc, FalkorReadVMA],(instregex "^SQD(MLAL|MLSL)v.*$")>;
+// SIMD Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteVLD], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
+def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD], (instrs LD2i64)>;
+def : InstRW<[WriteVLD, WriteAdr], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
+def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteVLD, WriteAdr], (instrs LD2i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD3i64_POST)>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD4i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, WriteAdr], (instregex "^LD2i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>;
+def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instrs LD3Threev2d_POST)>;
+def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "LD3i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, WriteAdr], (instregex "LD3i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>;
+def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instrs LD4Fourv2d_POST)>;
+def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, WriteAdr], (instregex "^LD4i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, WriteAdr],(instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, WriteAdr],(instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "LD3Threev(16b|8h|4s)$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, WriteAdr],(instregex "LD3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
+
+// Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>;
+def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>;
+
+// SIMD Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN|XTN2)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
+
+def : InstRW<[FalkorWr_1VXVY_6cyc], (instrs FRECPS64, FRSQRTS64)>;
+
+def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],(instregex "^INSv(i32|i64)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>;
+
+def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs FRECPSv4f32, FRSQRTSv4f32)>;
+
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instrs FRECPSv2f64, FRSQRTSv2f64)>;
+
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>;
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>;
+
+def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBL(v8i8Four|v16i8Three)$")>;
+def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBX(v8i8Three|v16i8Three)$")>;
+
+def : InstRW<[FalkorWr_5VXVY_7cyc], (instrs TBLv16i8Four)>;
+def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>;
+
+// SIMD Store Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
+def : InstRW<[WriteVST, WriteAdr], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
+def : InstRW<[WriteVST, WriteAdr], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>;
+
+def : InstRW<[WriteVST, WriteVST], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[WriteVST, WriteVST], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[WriteVST, WriteVST], (instregex "^ST3(i8|i16|i32|i64)$")>;
+def : InstRW<[WriteVST, WriteVST], (instregex "^ST4(i8|i16|i32|i64)$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
+
+def : InstRW<[WriteV, WriteVST, WriteVST], (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>;
+def : InstRW<[WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>;
+
+def : InstRW<[WriteVST, WriteVST, WriteVST], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST], (instrs ST3Threev2d)>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST3Threev2d_POST)>;
+
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST], (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>;
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>;
+
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instrs ST4Fourv2d)>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST4Fourv2d_POST)>;
+
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST3Three(v16b|v8h|v4s)$")>;
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
+
+def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST4Four(v16b|v8h|v4s)$")>;
+def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1none_0cyc], (instrs B)>;
+def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
+def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>;
+def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>;
+def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>;
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs SHA1Hrr)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs AESIMCrr, AESMCrr)>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs AESDrr, AESErr)>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>;
+def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>;
+def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>;
+def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>;
+
+// FP Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteLD], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
+def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
+def : InstRW<[WriteLD], (instregex "^LDUR(Q|D|S|H|B)i$")>;
+def : InstRW<[FalkorWr_LDR], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDNPQi)>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDPQi)>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDNP(D|S)i$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDP(D|S)i$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi, WriteAdr],(instregex "LDP(D|S)(pre|post)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi, WriteAdr],(instregex "^LDPQ(pre|post)$")>;
+
+// FP Data Processing Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(H|S|D)r(r|i)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P)(S|U)U(W|X)(H|S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(H|S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(H|S|D)rrr$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(16|32|64)p$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTHSr, FCVTHDr)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(H|S|D)r$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(16|32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTSHr, FCVTDHr)>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^F(N)?MUL(H|S)rr$")>;
+
+def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^F(N)?MULDrr$")>;
+
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>;
+def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)Drrr$")>;
+// FP Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(WH|WS|XH|XD|XDHigh)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Hi|Hr|S0|Si|Sr|D0|Di|Dr|v.*_ns)$")>;
+
+def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i16|v1i32|v2i32|v1i64|v4i16|v2f32|v4f16|d|s)(_shift)?")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>;
+
+
+// Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>;
+def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>;
+
+def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
+def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDP(W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(B|H|W|X)ui$")>;
+def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(B|H|W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(W|X)l$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDTR(B|H|W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(B|H|W|X)i$")>;
+
+def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
+def : InstRW<[FalkorWr_1LD_4cyc], (instrs LDRSWl)>;
+def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
+
+def : InstRW<[FalkorWr_PRFM], (instregex "^PRFMro(W|X)$")>;
+def : InstRW<[FalkorWr_LDR], (instregex "^LDR(B|H|W|X)ro(W|X)$")>;
+
+def : InstRW<[FalkorWr_LDRS], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
+
+def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>;
+def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>;
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>;
+def : InstRW<[FalkorWr_1X_2cyc], (instregex "^CRC32.*$")>;
+def : InstRW<[FalkorWr_1XYZ_2cyc], (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>;
+def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>;
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1X_4cyc], (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
+def : InstRW<[FalkorWr_1X_4cyc], (instregex "^M(ADD|SUB)Wrrr$")>;
+
+def : InstRW<[FalkorWr_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
+def : InstRW<[FalkorWr_1X_5cyc], (instregex "^M(ADD|SUB)Xrrr$")>;
+
+def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>;
+def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)(MLAL|MLSL|MULL)v.*$")>;
+
+// Move and Shift Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W|X).*")>;
+def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^ADRP?$")>;
+def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVN(W|X)i$")>;
+def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>;
+
+// Other Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1LD_0cyc], (instrs CLREX, DMB, DSB)>;
+def : InstRW<[FalkorWr_1none_0cyc], (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>;
+def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>;
+def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>;
+
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^(LDAR(B|H|W|X)|LDAXP(W|X)|LDAXR(B|H|W|X)|LDXP(W|X)|LDXR(B|H|W|X))$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS)>;
+
+def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>;
+
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
+def : InstRW<[WriteVST], (instrs STNPDi, STNPSi)>;
+def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>;
+def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>;
+
+def : InstRW<[WriteST], (instregex "^LDC.*$")>;
+def : InstRW<[WriteST], (instregex "^STLR(B|H|W|X)$")>;
+def : InstRW<[WriteST], (instregex "^STXP(W|X)$")>;
+def : InstRW<[WriteST], (instregex "^STXR(B|H|W|X)$")>;
+
+def : InstRW<[WriteSTX], (instregex "^STLXP(W|X)$")>;
+def : InstRW<[WriteSTX], (instregex "^STLXR(B|H|W|X)$")>;
+def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>;
+
+// Store Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteVST], (instregex "^STP(D|S)(i|post|pre)$")>;
+def : InstRW<[WriteST], (instregex "^STP(W|X)(i|post|pre)$")>;
+def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>;
+def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>;
+def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)(post|pre|ui)$")>;
+def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>;
+def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>;
+
+def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)ro(W|X)$")>;
+
+def : InstRW<[WriteVST, WriteVST], (instregex "^STPQ(i|post|pre)$")>;
diff --git a/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td b/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
new file mode 100644
index 000000000000..9cdb4be4246b
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
@@ -0,0 +1,361 @@
+//=- AArch64SchedFalkorWrRes.td - Falkor Write Res ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains all of the Falkor specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: FalkorWr
+// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD)
+// Latency: #cyc
+//
+// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued
+// down one Z pipe, six SD pipes, four VX pipes and the total latency is
+// six cycles.
+//
+// Contains all of the Falkor specific ReadAdvance types for forwarding logic.
+//
+// Contains all of the Falkor specific WriteVariant types for immediate zero
+// and LSLFast.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define 1 micro-op types
+
+
+def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; }
+def FalkorWr_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
+def FalkorWr_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
+def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; }
+def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; }
+def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; }
+def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; }
+def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
+def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
+def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
+def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
+def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; }
+
+def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
+def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
+def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
+def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
+def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
+def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
+
+def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
+def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; }
+def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; }
+
+def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
+def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
+def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Define 2 micro-op types
+
+def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+ let Latency = 8;
+ let ResourceCycles = [2, 8];
+}
+
+def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+ let Latency = 16;
+ let ResourceCycles = [2, 16];
+}
+
+def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 3 micro-op types
+
+def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitZ]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 4 micro-op types
+
+def FalkorWr_2VX_2VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
+ FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 5 micro-op types
+
+def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 6 micro-op types
+
+def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 8 micro-op types
+
+def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 9 micro-op types
+
+def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitXYZ,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 9;
+}
+
+def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitXYZ,
+ FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 9;
+}
+
+// Forwarding logic is modeled for vector multiply and accumulate
+// -----------------------------------------------------------------------------
+def FalkorReadVMA : SchedReadAdvance<2, [FalkorWr_1VXVY_4cyc,
+ FalkorWr_2VXVY_4cyc]>;
+def FalkorReadFMA : SchedReadAdvance<3, [FalkorWr_1VXVY_5cyc,
+ FalkorWr_1VXVY_6cyc,
+ FalkorWr_2VXVY_5cyc,
+ FalkorWr_2VXVY_6cyc]>;
+
+// SchedPredicates and WriteVariants for Immediate Zero and LSLFast
+// -----------------------------------------------------------------------------
+def FalkorImmZPred : SchedPredicate<[{TII->isGPRZero(*MI)}]>;
+def FalkorLSLFastPred : SchedPredicate<[{TII->isFalkorLSLFast(*MI)}]>;
+
+def FalkorWr_FMOV : SchedWriteVariant<[
+ SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>;
+
+def FalkorWr_MOVZ : SchedWriteVariant<[
+ SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZB_1cyc]>]>;
+
+def FalkorWr_LDR : SchedWriteVariant<[
+ SchedVar<FalkorLSLFastPred, [FalkorWr_1LD_3cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_4cyc]>]>;
+
+def FalkorWr_ADD : SchedWriteVariant<[
+ SchedVar<FalkorLSLFastPred, [FalkorWr_1XYZ_1cyc]>,
+ SchedVar<FalkorImmZPred, [FalkorWr_1XYZ_1cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_2XYZ_2cyc]>]>;
+
+def FalkorWr_PRFM : SchedWriteVariant<[
+ SchedVar<FalkorLSLFastPred, [FalkorWr_1ST_3cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
+
+def FalkorWr_LDRS : SchedWriteVariant<[
+ SchedVar<FalkorLSLFastPred, [FalkorWr_1LD_4cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;
diff --git a/lib/Target/AArch64/AArch64SchedKryoDetails.td b/lib/Target/AArch64/AArch64SchedKryoDetails.td
index 426ae6103e4b..02cccccd3078 100644
--- a/lib/Target/AArch64/AArch64SchedKryoDetails.td
+++ b/lib/Target/AArch64/AArch64SchedKryoDetails.td
@@ -776,23 +776,29 @@ def KryoWrite_4cyc_X_X_115ln :
}
def : InstRW<[KryoWrite_4cyc_X_X_115ln],
(instregex "FCVTZ(S|U)(v2f64|v4f32|(v2i64|v4i32)(_shift)?)$")>;
-def KryoWrite_1cyc_XA_Y_noRSV_43ln :
+def KryoWrite_10cyc_XA_Y_noRSV_43ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 3;
+ let Latency = 10; let NumMicroOps = 3;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_43ln],
- (instrs FDIVDrr, FDIVSrr)>;
-def KryoWrite_1cyc_XA_Y_noRSV_121ln :
+def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_43ln],
+ (instrs FDIVSrr)>;
+def KryoWrite_14cyc_XA_Y_noRSV_43ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 3;
+ let Latency = 14; let NumMicroOps = 3;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_121ln],
+def : InstRW<[KryoWrite_14cyc_XA_Y_noRSV_43ln],
+ (instrs FDIVDrr)>;
+def KryoWrite_10cyc_XA_Y_noRSV_121ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 10; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_121ln],
(instrs FDIVv2f32)>;
-def KryoWrite_1cyc_XA_Y_XA_Y_123ln :
+def KryoWrite_14cyc_XA_Y_XA_Y_123ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 4;
+ let Latency = 14; let NumMicroOps = 4;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_XA_Y_123ln],
+def : InstRW<[KryoWrite_14cyc_XA_Y_XA_Y_123ln],
(instrs FDIVv2f64, FDIVv4f32)>;
def KryoWrite_5cyc_X_noRSV_55ln :
SchedWriteRes<[KryoUnitX]> {
@@ -968,24 +974,36 @@ def KryoWrite_2cyc_XY_XY_109ln :
}
def : InstRW<[KryoWrite_2cyc_XY_XY_109ln],
(instregex "FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)")>;
-def KryoWrite_1cyc_XA_Y_noRSV_42ln :
+def KryoWrite_12cyc_XA_Y_noRSV_42ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 3;
+ let Latency = 12; let NumMicroOps = 3;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_42ln],
- (instregex "FSQRT(S|D)r")>;
-def KryoWrite_1cyc_XA_Y_noRSV_120ln :
+def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_42ln],
+ (instrs FSQRTSr)>;
+def KryoWrite_21cyc_XA_Y_noRSV_42ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 3;
+ let Latency = 21; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_21cyc_XA_Y_noRSV_42ln],
+ (instrs FSQRTDr)>;
+def KryoWrite_12cyc_XA_Y_noRSV_120ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 12; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_120ln],
+ (instrs FSQRTv2f32)>;
+def KryoWrite_21cyc_XA_Y_XA_Y_122ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
+ let Latency = 21; let NumMicroOps = 4;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_120ln],
- (instregex "FSQRTv2f32")>;
-def KryoWrite_1cyc_XA_Y_XA_Y_122ln :
+def : InstRW<[KryoWrite_21cyc_XA_Y_XA_Y_122ln],
+ (instrs FSQRTv4f32)>;
+def KryoWrite_36cyc_XA_Y_XA_Y_122ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 4;
+ let Latency = 36; let NumMicroOps = 4;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_XA_Y_122ln],
- (instregex "FSQRT(v2f64|v4f32)")>;
+def : InstRW<[KryoWrite_36cyc_XA_Y_XA_Y_122ln],
+ (instrs FSQRTv2f64)>;
def KryoWrite_1cyc_X_201ln :
SchedWriteRes<[KryoUnitX]> {
let Latency = 1; let NumMicroOps = 1;
diff --git a/lib/Target/AArch64/AArch64SchedM1.td b/lib/Target/AArch64/AArch64SchedM1.td
index 14d6891253fa..3fbbc0be682d 100644
--- a/lib/Target/AArch64/AArch64SchedM1.td
+++ b/lib/Target/AArch64/AArch64SchedM1.td
@@ -366,7 +366,8 @@ def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>;
// Cryptography instructions.
def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
-def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AES")>;
+def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
+def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
diff --git a/lib/Target/AArch64/AArch64SchedThunderX.td b/lib/Target/AArch64/AArch64SchedThunderX.td
new file mode 100644
index 000000000000..9a0cb702518d
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedThunderX.td
@@ -0,0 +1,352 @@
+//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM ThunderX T8X
+// (T88, T81, T83) processors.
+// Loosely based on Cortex-A53 which is somewhat similar.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
+
+// Cavium ThunderX T8X scheduling machine model.
+def ThunderXT8XModel : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops dispatched per cycle.
+ let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order.
+ let LoadLatency = 3; // Optimistic load latency.
+ let MispredictPenalty = 8; // Branch mispredict penalty.
+ let PostRAScheduler = 1; // Use PostRA scheduler.
+ let CompleteModel = 1;
+}
+
+// Modeling each pipeline with BufferSize == 0 since T8X is in-order.
+def THXT8XUnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def THXT8XUnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
+def THXT8XUnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
+def THXT8XUnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store
+def THXT8XUnitBr : ProcResource<1> { let BufferSize = 0; } // Branch
+def THXT8XUnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU
+def THXT8XUnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types mapping the ProcResources and
+// latencies.
+
+let SchedModel = ThunderXT8XModel in {
+
+// ALU
+def : WriteRes<WriteImm, [THXT8XUnitALU]> { let Latency = 1; }
+def : WriteRes<WriteI, [THXT8XUnitALU]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [THXT8XUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteIEReg, [THXT8XUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteIS, [THXT8XUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteExtr, [THXT8XUnitALU]> { let Latency = 2; }
+
+// MAC
+def : WriteRes<WriteIM32, [THXT8XUnitMAC]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+}
+
+def : WriteRes<WriteIM64, [THXT8XUnitMAC]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+}
+
+// Div
+def : WriteRes<WriteID32, [THXT8XUnitDiv]> {
+ let Latency = 12;
+ let ResourceCycles = [6];
+}
+
+def : WriteRes<WriteID64, [THXT8XUnitDiv]> {
+ let Latency = 14;
+ let ResourceCycles = [8];
+}
+
+// Load
+def : WriteRes<WriteLD, [THXT8XUnitLdSt]> { let Latency = 3; }
+def : WriteRes<WriteLDIdx, [THXT8XUnitLdSt]> { let Latency = 3; }
+def : WriteRes<WriteLDHi, [THXT8XUnitLdSt]> { let Latency = 3; }
+
+// Vector Load
+def : WriteRes<WriteVLD, [THXT8XUnitLdSt]> {
+ let Latency = 8;
+ let ResourceCycles = [3];
+}
+
+def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 6;
+ let ResourceCycles = [1];
+}
+
+def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 11;
+ let ResourceCycles = [7];
+}
+
+def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 12;
+ let ResourceCycles = [8];
+}
+
+def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 13;
+ let ResourceCycles = [9];
+}
+
+def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 13;
+ let ResourceCycles = [9];
+}
+
+// Pre/Post Indexing
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Store
+def : WriteRes<WriteST, [THXT8XUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [THXT8XUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [THXT8XUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTX, [THXT8XUnitLdSt]> { let Latency = 1; }
+
+// Vector Store
+def : WriteRes<WriteVST, [THXT8XUnitLdSt]>;
+def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>;
+
+def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 10;
+ let ResourceCycles = [9];
+}
+
+def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 11;
+ let ResourceCycles = [10];
+}
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [THXT8XUnitBr]>;
+def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>;
+def : WriteRes<WriteBrReg, [THXT8XUnitBr]>;
+def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>;
+def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>;
+def : WriteRes<WriteSys, [THXT8XUnitBr]>;
+def : WriteRes<WriteBarrier, [THXT8XUnitBr]>;
+def : WriteRes<WriteHint, [THXT8XUnitBr]>;
+
+// FP ALU
+def : WriteRes<WriteF, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
+
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
+def : WriteRes<WriteFDiv, [THXT8XUnitFPMDS]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; }
+
+def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 12;
+ let ResourceCycles = [9];
+}
+
+def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 17;
+ let ResourceCycles = [14];
+}
+
+def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 31;
+ let ResourceCycles = [28];
+}
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+// No forwarding for these reads.
+def : ReadAdvance<ReadExtrHi, 1>;
+def : ReadAdvance<ReadAdrBase, 2>;
+def : ReadAdvance<ReadVLD, 2>;
+
+// FIXME: This needs more targeted benchmarking.
+// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
+// operands are needed one cycle later if and only if they are to be
+// shifted. Otherwise, they too are needed two cycles later. This same
+// ReadAdvance applies to Extended registers as well, even though there is
+// a separate SchedPredicate for them.
+def : ReadAdvance<ReadI, 2, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def THXT8XReadISReg : SchedReadVariant<[
+ SchedVar<RegShiftedPred, [THXT8XReadShifted]>,
+ SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
+def : SchedAlias<ReadISReg, THXT8XReadISReg>;
+
+def THXT8XReadIEReg : SchedReadVariant<[
+ SchedVar<RegExtendedPred, [THXT8XReadShifted]>,
+ SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
+def : SchedAlias<ReadIEReg, THXT8XReadIEReg>;
+
+// MAC - Operands are generally needed one cycle later in the MAC pipe.
+// Accumulator operands are needed two cycles later.
+def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadIMA, 2, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+
+// Div
+def : ReadAdvance<ReadID, 1, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific InstRW.
+
+//---
+// Branch
+//---
+def : InstRW<[THXT8XWriteBR], (instregex "^B")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^BL")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^B.*")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>;
+def : InstRW<[THXT8XWriteBRR], (instregex "^BR")>;
+def : InstRW<[THXT8XWriteBRR], (instregex "^BLR")>;
+
+//---
+// Ret
+//---
+def : InstRW<[THXT8XWriteRET], (instregex "^RET")>;
+
+//---
+// Miscellaneous
+//---
+def : InstRW<[WriteI], (instrs COPY)>;
+
+//---
+// Vector Loads
+//---
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+
+//---
+// Vector Stores
+//---
+def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+//---
+// Floating Point MAC, DIV, SQRT
+//---
+def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
+def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>;
+def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+
+}
diff --git a/lib/Target/AArch64/AArch64SchedVulcan.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index 35a40c314bf4..3654eeca530a 100644
--- a/lib/Target/AArch64/AArch64SchedVulcan.td
+++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -1,4 +1,4 @@
-//=- AArch64SchedVulcan.td - Vulcan Scheduling Defs ----------*- tablegen -*-=//
+//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 Scheduling ---*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -6,23 +6,23 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-// 1. Introduction
//
-// This file defines the machine model for Broadcom Vulcan to support
-// instruction scheduling and other instruction cost heuristics.
+// This file defines the scheduling model for Cavium ThunderX2T99
+// processors.
+// Based on Broadcom Vulcan.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// 2. Pipeline Description.
-def VulcanModel : SchedMachineModel {
+def ThunderX2T99Model : SchedMachineModel {
let IssueWidth = 4; // 4 micro-ops dispatched at a time.
let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer.
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 12; // Extra cycles for mispredicted branch.
// Determined via a mix of micro-arch details and experimentation.
- let LoopMicroOpBufferSize = 32;
+ let LoopMicroOpBufferSize = 32;
let PostRAScheduler = 1; // Using PostRA sched.
let CompleteModel = 1;
}
@@ -30,155 +30,155 @@ def VulcanModel : SchedMachineModel {
// Define the issue ports.
// Port 0: ALU, FP/SIMD.
-def VulcanP0 : ProcResource<1>;
+def THX2T99P0 : ProcResource<1>;
// Port 1: ALU, FP/SIMD, integer mul/div.
-def VulcanP1 : ProcResource<1>;
+def THX2T99P1 : ProcResource<1>;
// Port 2: ALU, Branch.
-def VulcanP2 : ProcResource<1>;
+def THX2T99P2 : ProcResource<1>;
// Port 3: Store data.
-def VulcanP3 : ProcResource<1>;
+def THX2T99P3 : ProcResource<1>;
// Port 4: Load/store.
-def VulcanP4 : ProcResource<1>;
+def THX2T99P4 : ProcResource<1>;
// Port 5: Load/store.
-def VulcanP5 : ProcResource<1>;
+def THX2T99P5 : ProcResource<1>;
-let SchedModel = VulcanModel in {
+let SchedModel = ThunderX2T99Model in {
// Define groups for the functional units on each issue port. Each group
// created will be used by a WriteRes later on.
//
// NOTE: Some groups only contain one member. This is a way to create names for
// the various functional units that share a single issue port. For example,
-// "VulcanI1" for ALU ops on port 1 and "VulcanF1" for FP ops on port 1.
+// "THX2T99I1" for ALU ops on port 1 and "THX2T99F1" for FP ops on port 1.
// Integer divide and multiply micro-ops only on port 1.
-def VulcanI1 : ProcResGroup<[VulcanP1]>;
+def THX2T99I1 : ProcResGroup<[THX2T99P1]>;
// Branch micro-ops only on port 2.
-def VulcanI2 : ProcResGroup<[VulcanP2]>;
+def THX2T99I2 : ProcResGroup<[THX2T99P2]>;
// ALU micro-ops on ports 0, 1, and 2.
-def VulcanI012 : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2]>;
+def THX2T99I012 : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2]>;
// Crypto FP/SIMD micro-ops only on port 1.
-def VulcanF1 : ProcResGroup<[VulcanP1]>;
+def THX2T99F1 : ProcResGroup<[THX2T99P1]>;
// FP/SIMD micro-ops on ports 0 and 1.
-def VulcanF01 : ProcResGroup<[VulcanP0, VulcanP1]>;
+def THX2T99F01 : ProcResGroup<[THX2T99P0, THX2T99P1]>;
// Store data micro-ops only on port 3.
-def VulcanSD : ProcResGroup<[VulcanP3]>;
+def THX2T99SD : ProcResGroup<[THX2T99P3]>;
// Load/store micro-ops on ports 4 and 5.
-def VulcanLS01 : ProcResGroup<[VulcanP4, VulcanP5]>;
+def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>;
// 60 entry unified scheduler.
-def VulcanAny : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2,
- VulcanP3, VulcanP4, VulcanP5]> {
+def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2,
+ THX2T99P3, THX2T99P4, THX2T99P5]> {
let BufferSize=60;
}
// Define commonly used write types for InstRW specializations.
-// All definitions follow the format: VulcanWrite_<NumCycles>Cyc_<Resources>.
+// All definitions follow the format: THX2T99Write_<NumCycles>Cyc_<Resources>.
// 3 cycles on I1.
-def VulcanWrite_3Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 3; }
+def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 3; }
// 4 cycles on I1.
-def VulcanWrite_4Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 4; }
+def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 4; }
// 1 cycle on I0, I1, or I2.
-def VulcanWrite_1Cyc_I012 : SchedWriteRes<[VulcanI012]> { let Latency = 1; }
+def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { let Latency = 1; }
// 5 cycles on F1.
-def VulcanWrite_5Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 5; }
+def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 5; }
// 7 cycles on F1.
-def VulcanWrite_7Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 7; }
+def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 7; }
// 4 cycles on F0 or F1.
-def VulcanWrite_4Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 4; }
+def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 4; }
// 5 cycles on F0 or F1.
-def VulcanWrite_5Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 5; }
+def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 5; }
// 6 cycles on F0 or F1.
-def VulcanWrite_6Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 6; }
+def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 6; }
// 7 cycles on F0 or F1.
-def VulcanWrite_7Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 7; }
+def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 7; }
// 8 cycles on F0 or F1.
-def VulcanWrite_8Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 8; }
+def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 8; }
// 16 cycles on F0 or F1.
-def VulcanWrite_16Cyc_F01 : SchedWriteRes<[VulcanF01]> {
+def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
let ResourceCycles = [8];
}
// 23 cycles on F0 or F1.
-def VulcanWrite_23Cyc_F01 : SchedWriteRes<[VulcanF01]> {
+def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 23;
let ResourceCycles = [11];
}
// 1 cycles on LS0 or LS1.
-def VulcanWrite_1Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 1; }
+def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 1; }
// 4 cycles on LS0 or LS1.
-def VulcanWrite_4Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 4; }
+def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 4; }
// 5 cycles on LS0 or LS1.
-def VulcanWrite_5Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 5; }
+def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 5; }
// 6 cycles on LS0 or LS1.
-def VulcanWrite_6Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 6; }
+def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 6; }
// 5 cycles on LS0 or LS1 and I0, I1, or I2.
-def VulcanWrite_5Cyc_LS01_I012 : SchedWriteRes<[VulcanLS01, VulcanI012]> {
+def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
let Latency = 5;
let NumMicroOps = 2;
}
// 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
-def VulcanWrite_6Cyc_LS01_I012_I012 :
- SchedWriteRes<[VulcanLS01, VulcanI012, VulcanI012]> {
+def THX2T99Write_6Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
let Latency = 6;
let NumMicroOps = 3;
}
// 1 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_1Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 1;
let NumMicroOps = 2;
}
// 5 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_5Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 5;
let NumMicroOps = 2;
}
// 6 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_6Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 6;
let NumMicroOps = 2;
}
// 7 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_7Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 7;
let NumMicroOps = 2;
}
// 8 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_8Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 8;
let NumMicroOps = 2;
}
@@ -202,7 +202,7 @@ def : ReadAdvance<ReadVLD, 0>;
//===----------------------------------------------------------------------===//
// 3. Instruction Tables.
-let SchedModel = VulcanModel in {
+let SchedModel = ThunderX2T99Model in {
//---
// 3.1 Branch Instructions
@@ -211,7 +211,7 @@ let SchedModel = VulcanModel in {
// Branch, immed
// Branch and link, immed
// Compare and branch
-def : WriteRes<WriteBr, [VulcanI2]> { let Latency = 1; }
+def : WriteRes<WriteBr, [THX2T99I2]> { let Latency = 1; }
def : WriteRes<WriteSys, []> { let Latency = 1; }
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
@@ -222,7 +222,7 @@ def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
// Branch, register
// Branch and link, register != LR
// Branch and link, register = LR
-def : WriteRes<WriteBrReg, [VulcanI2]> { let Latency = 1; }
+def : WriteRes<WriteBrReg, [THX2T99I2]> { let Latency = 1; }
//---
// 3.2 Arithmetic and Logical Instructions
@@ -233,25 +233,25 @@ def : WriteRes<WriteBrReg, [VulcanI2]> { let Latency = 1; }
// Conditional compare
// Conditional select
// Address generation
-def : WriteRes<WriteI, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteI, [THX2T99I012]> { let Latency = 1; }
def : InstRW<[WriteI], (instrs COPY)>;
// ALU, extend and/or shift
-def : WriteRes<WriteISReg, [VulcanI012]> {
+def : WriteRes<WriteISReg, [THX2T99I012]> {
let Latency = 2;
let ResourceCycles = [2];
}
-def : WriteRes<WriteIEReg, [VulcanI012]> {
+def : WriteRes<WriteIEReg, [THX2T99I012]> {
let Latency = 2;
let ResourceCycles = [2];
}
// Move immed
-def : WriteRes<WriteImm, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteImm, [THX2T99I012]> { let Latency = 1; }
// Variable shift
-def : WriteRes<WriteIS, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteIS, [THX2T99I012]> { let Latency = 1; }
//---
// 3.4 Divide and Multiply Instructions
@@ -259,33 +259,33 @@ def : WriteRes<WriteIS, [VulcanI012]> { let Latency = 1; }
// Divide, W-form
// Latency range of 13-23. Take the average.
-def : WriteRes<WriteID32, [VulcanI1]> {
+def : WriteRes<WriteID32, [THX2T99I1]> {
let Latency = 18;
let ResourceCycles = [18];
}
// Divide, X-form
// Latency range of 13-39. Take the average.
-def : WriteRes<WriteID64, [VulcanI1]> {
+def : WriteRes<WriteID64, [THX2T99I1]> {
let Latency = 26;
let ResourceCycles = [26];
}
// Multiply accumulate, W-form
-def : WriteRes<WriteIM32, [VulcanI012]> { let Latency = 5; }
+def : WriteRes<WriteIM32, [THX2T99I012]> { let Latency = 5; }
// Multiply accumulate, X-form
-def : WriteRes<WriteIM64, [VulcanI012]> { let Latency = 5; }
+def : WriteRes<WriteIM64, [THX2T99I012]> { let Latency = 5; }
// Bitfield extract, two reg
-def : WriteRes<WriteExtr, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteExtr, [THX2T99I012]> { let Latency = 1; }
// Bitfield move, basic
// Bitfield move, insert
// NOTE: Handled by WriteIS.
// Count leading
-def : InstRW<[VulcanWrite_3Cyc_I1], (instregex "^CLS(W|X)r$",
+def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$",
"^CLZ(W|X)r$")>;
// Reverse bits/bytes
@@ -300,13 +300,13 @@ def : InstRW<[VulcanWrite_3Cyc_I1], (instregex "^CLS(W|X)r$",
// Load register, unscaled immed
// Load register, immed unprivileged
// Load register, unsigned immed
-def : WriteRes<WriteLD, [VulcanLS01]> { let Latency = 4; }
+def : WriteRes<WriteLD, [THX2T99LS01]> { let Latency = 4; }
// Load register, immed post-index
// NOTE: Handled by WriteLD, WriteI.
// Load register, immed pre-index
// NOTE: Handled by WriteLD, WriteAdr.
-def : WriteRes<WriteAdr, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteAdr, [THX2T99I012]> { let Latency = 1; }
// Load register offset, basic
// Load register, register offset, scale by 4/8
@@ -314,15 +314,15 @@ def : WriteRes<WriteAdr, [VulcanI012]> { let Latency = 1; }
// Load register offset, extend
// Load register, register offset, extend, scale by 4/8
// Load register, register offset, extend, scale by 2
-def VulcanWriteLDIdx : SchedWriteVariant<[
- SchedVar<ScaledIdxPred, [VulcanWrite_6Cyc_LS01_I012_I012]>,
- SchedVar<NoSchedPred, [VulcanWrite_5Cyc_LS01_I012]>]>;
-def : SchedAlias<WriteLDIdx, VulcanWriteLDIdx>;
+def THX2T99WriteLDIdx : SchedWriteVariant<[
+ SchedVar<ScaledIdxPred, [THX2T99Write_6Cyc_LS01_I012_I012]>,
+ SchedVar<NoSchedPred, [THX2T99Write_5Cyc_LS01_I012]>]>;
+def : SchedAlias<WriteLDIdx, THX2T99WriteLDIdx>;
-def VulcanReadAdrBase : SchedReadVariant<[
+def THX2T99ReadAdrBase : SchedReadVariant<[
SchedVar<ScaledIdxPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>]>;
-def : SchedAlias<ReadAdrBase, VulcanReadAdrBase>;
+def : SchedAlias<ReadAdrBase, THX2T99ReadAdrBase>;
// Load pair, immed offset, normal
// Load pair, immed offset, signed words, base != SP
@@ -347,7 +347,7 @@ def : WriteRes<WriteLDHi, []> {
// Store register, unscaled immed
// Store register, immed unprivileged
// Store register, unsigned immed
-def : WriteRes<WriteST, [VulcanLS01, VulcanSD]> {
+def : WriteRes<WriteST, [THX2T99LS01, THX2T99SD]> {
let Latency = 1;
let NumMicroOps = 2;
}
@@ -364,14 +364,14 @@ def : WriteRes<WriteST, [VulcanLS01, VulcanSD]> {
// Store register, register offset, extend
// Store register, register offset, extend, scale by 4/8
// Store register, register offset, extend, scale by 1
-def : WriteRes<WriteSTIdx, [VulcanLS01, VulcanSD, VulcanI012]> {
+def : WriteRes<WriteSTIdx, [THX2T99LS01, THX2T99SD, THX2T99I012]> {
let Latency = 1;
let NumMicroOps = 3;
}
// Store pair, immed offset, W-form
// Store pair, immed offset, X-form
-def : WriteRes<WriteSTP, [VulcanLS01, VulcanSD]> {
+def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> {
let Latency = 1;
let NumMicroOps = 2;
}
@@ -389,35 +389,35 @@ def : WriteRes<WriteSTP, [VulcanLS01, VulcanSD]> {
// FP absolute value
// FP min/max
// FP negate
-def : WriteRes<WriteF, [VulcanF01]> { let Latency = 5; }
+def : WriteRes<WriteF, [THX2T99F01]> { let Latency = 5; }
// FP arithmetic
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADD", "^FSUB")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>;
// FP compare
-def : WriteRes<WriteFCmp, [VulcanF01]> { let Latency = 5; }
+def : WriteRes<WriteFCmp, [THX2T99F01]> { let Latency = 5; }
// FP divide, S-form
// FP square root, S-form
-def : WriteRes<WriteFDiv, [VulcanF01]> {
+def : WriteRes<WriteFDiv, [THX2T99F01]> {
let Latency = 16;
let ResourceCycles = [8];
}
// FP divide, D-form
// FP square root, D-form
-def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>;
// FP multiply
// FP multiply accumulate
-def : WriteRes<WriteFMul, [VulcanF01]> { let Latency = 6; }
+def : WriteRes<WriteFMul, [THX2T99F01]> { let Latency = 6; }
// FP round to integral
-def : InstRW<[VulcanWrite_7Cyc_F01],
+def : InstRW<[THX2T99Write_7Cyc_F01],
(instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
// FP select
-def : InstRW<[VulcanWrite_4Cyc_F01], (instregex "^FCSEL")>;
+def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>;
//---
// 3.9 FP Miscellaneous Instructions
@@ -426,16 +426,16 @@ def : InstRW<[VulcanWrite_4Cyc_F01], (instregex "^FCSEL")>;
// FP convert, from vec to vec reg
// FP convert, from gen to vec reg
// FP convert, from vec to gen reg
-def : WriteRes<WriteFCvt, [VulcanF01]> { let Latency = 7; }
+def : WriteRes<WriteFCvt, [THX2T99F01]> { let Latency = 7; }
// FP move, immed
// FP move, register
-def : WriteRes<WriteFImm, [VulcanF01]> { let Latency = 4; }
+def : WriteRes<WriteFImm, [THX2T99F01]> { let Latency = 4; }
// FP transfer, from gen to vec reg
// FP transfer, from vec to gen reg
-def : WriteRes<WriteFCopy, [VulcanF01]> { let Latency = 4; }
-def : InstRW<[VulcanWrite_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
+def : WriteRes<WriteFCopy, [THX2T99F01]> { let Latency = 4; }
+def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
//---
// 3.12 ASIMD Integer Instructions
@@ -470,39 +470,39 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [VulcanF01]> { let Latency = 7; }
+def : WriteRes<WriteV, [THX2T99F01]> { let Latency = 7; }
// ASIMD arith, reduce, 4H/4S
// ASIMD arith, reduce, 8B/8H
// ASIMD arith, reduce, 16B
-def : InstRW<[VulcanWrite_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
// ASIMD logical (MOV, MVN, ORN, ORR)
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>;
// ASIMD polynomial (8x8) multiply long
-def : InstRW<[VulcanWrite_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>;
//---
// 3.13 ASIMD Floating-point Instructions
//---
// ASIMD FP absolute value
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FABSv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>;
// ASIMD FP arith, normal, D-form
// ASIMD FP arith, normal, Q-form
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
// ASIMD FP arith,pairwise, D-form
// ASIMD FP arith, pairwise, Q-form
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADDPv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>;
// ASIMD FP compare, D-form
// ASIMD FP compare, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>;
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
"^FCMGTv", "^FCMLEv",
"^FCMLTv")>;
@@ -513,42 +513,42 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
// NOTE: Handled by WriteV.
// ASIMD FP divide, D-form, F32
-def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv2f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>;
// ASIMD FP divide, Q-form, F32
-def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv4f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>;
// ASIMD FP divide, Q-form, F64
-def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVv2f64)>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>;
// ASIMD FP max/min, normal, D-form
// ASIMD FP max/min, normal, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv",
"^FMINv", "^FMINNMv")>;
// ASIMD FP max/min, pairwise, D-form
// ASIMD FP max/min, pairwise, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv",
"^FMINPv", "^FMINNMPv")>;
// ASIMD FP max/min, reduce
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv",
"^FMINVv", "^FMINNMVv")>;
// ASIMD FP multiply, D-form, FZ
// ASIMD FP multiply, D-form, no FZ
// ASIMD FP multiply, Q-form, FZ
// ASIMD FP multiply, Q-form, no FZ
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>;
// ASIMD FP multiply accumulate, Dform, FZ
// ASIMD FP multiply accumulate, Dform, no FZ
// ASIMD FP multiply accumulate, Qform, FZ
// ASIMD FP multiply accumulate, Qform, no FZ
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>;
// ASIMD FP negate
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FNEGv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>;
// ASIMD FP round, D-form
// ASIMD FP round, Q-form
@@ -559,39 +559,39 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FNEGv")>;
//--
// ASIMD bit reverse
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^RBITv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>;
// ASIMD bitwise insert, D-form
// ASIMD bitwise insert, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>;
// ASIMD count, D-form
// ASIMD count, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>;
// ASIMD duplicate, gen reg
// ASIMD duplicate, element
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^DUPv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>;
// ASIMD extract
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^EXTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>;
// ASIMD extract narrow
// ASIMD extract narrow, saturating
// NOTE: Handled by WriteV.
// ASIMD insert, element to element
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
// ASIMD move, integer immed
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>;
// ASIMD move, FP immed
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMOVv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>;
// ASIMD reciprocal estimate, D-form
// ASIMD reciprocal estimate, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
"^FRSQRTEv", "^URSQRTEv")>;
@@ -599,31 +599,31 @@ def : InstRW<[VulcanWrite_5Cyc_F01],
// ASIMD reciprocal step, D-form, no FZ
// ASIMD reciprocal step, Q-form, FZ
// ASIMD reciprocal step, Q-form, no FZ
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>;
// ASIMD reverse
-def : InstRW<[VulcanWrite_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^REV16v", "^REV32v", "^REV64v")>;
// ASIMD table lookup, D-form
// ASIMD table lookup, Q-form
-def : InstRW<[VulcanWrite_8Cyc_F01], (instregex "^TBLv", "^TBXv")>;
+def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>;
// ASIMD transfer, element to word or word
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^UMOVv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^UMOVv")>;
// ASIMD transfer, element to gen reg
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>;
// ASIMD transfer gen reg to element
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
// ASIMD transpose
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^TRN1v", "^TRN2v",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v",
"^UZP1v", "^UZP2v")>;
// ASIMD unzip/zip
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
//--
// 3.15 ASIMD Load Instructions
@@ -631,114 +631,114 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
// ASIMD load, 1 element, multiple, 1 reg, D-form
// ASIMD load, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[VulcanWrite_4Cyc_LS01],
+def : InstRW<[THX2T99Write_4Cyc_LS01],
(instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
(instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg, D-form
// ASIMD load, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[VulcanWrite_4Cyc_LS01],
+def : InstRW<[THX2T99Write_4Cyc_LS01],
(instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
(instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg, D-form
// ASIMD load, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[VulcanWrite_5Cyc_LS01],
+def : InstRW<[THX2T99Write_5Cyc_LS01],
(instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr],
(instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, D-form
// ASIMD load, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[VulcanWrite_6Cyc_LS01],
+def : InstRW<[THX2T99Write_6Cyc_LS01],
(instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_6Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr],
(instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, one lane, B/H/S
// ASIMD load, 1 element, one lane, D
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD1i(8|16|32|64)_POST$")>;
// ASIMD load, 1 element, all lanes, D-form, B/H/S
// ASIMD load, 1 element, all lanes, D-form, D
// ASIMD load, 1 element, all lanes, Q-form
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, multiple, D-form, B/H/S
// ASIMD load, 2 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, one lane, B/H
// ASIMD load, 2 element, one lane, S
// ASIMD load, 2 element, one lane, D
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2i(8|16|32|64)_POST$")>;
// ASIMD load, 2 element, all lanes, D-form, B/H/S
// ASIMD load, 2 element, all lanes, D-form, D
// ASIMD load, 2 element, all lanes, Q-form
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, multiple, D-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_8Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
(instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
(instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, one lone, B/H
// ASIMD load, 3 element, one lane, S
// ASIMD load, 3 element, one lane, D
-def : InstRW<[VulcanWrite_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
(instregex "^LD3i(8|16|32|64)_POST$")>;
// ASIMD load, 3 element, all lanes, D-form, B/H/S
// ASIMD load, 3 element, all lanes, D-form, D
// ASIMD load, 3 element, all lanes, Q-form, B/H/S
// ASIMD load, 3 element, all lanes, Q-form, D
-def : InstRW<[VulcanWrite_7Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01],
(instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
(instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, multiple, D-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_8Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
(instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
(instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, one lane, B/H
// ASIMD load, 4 element, one lane, S
// ASIMD load, 4 element, one lane, D
-def : InstRW<[VulcanWrite_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
(instregex "^LD4i(8|16|32|64)_POST$")>;
// ASIMD load, 4 element, all lanes, D-form, B/H/S
// ASIMD load, 4 element, all lanes, D-form, D
// ASIMD load, 4 element, all lanes, Q-form, B/H/S
// ASIMD load, 4 element, all lanes, Q-form, D
-def : InstRW<[VulcanWrite_6Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01],
(instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
(instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
//--
@@ -747,82 +747,82 @@ def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr],
// ASIMD store, 1 element, multiple, 1 reg, D-form
// ASIMD store, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[VulcanWrite_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 2 reg, D-form
// ASIMD store, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[VulcanWrite_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, D-form
// ASIMD store, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[VulcanWrite_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, D-form
// ASIMD store, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[VulcanWrite_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, one lane, B/H/S
// ASIMD store, 1 element, one lane, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST1i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST1i(8|16|32|64)_POST$")>;
// ASIMD store, 2 element, multiple, D-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 2 element, one lane, B/H/S
// ASIMD store, 2 element, one lane, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST2i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST2i(8|16|32|64)_POST$")>;
// ASIMD store, 3 element, multiple, D-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 3 element, one lane, B/H
// ASIMD store, 3 element, one lane, S
// ASIMD store, 3 element, one lane, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST3i(8|16|32|64)_POST$")>;
// ASIMD store, 4 element, multiple, D-form, B/H/S
// ASIMD store, 4 element, multiple, Q-form, B/H/S
// ASIMD store, 4 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 4 element, one lane, B/H
// ASIMD store, 4 element, one lane, S
// ASIMD store, 4 element, one lane, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4i(8|16|32|64)_POST$")>;
//--
@@ -830,23 +830,23 @@ def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
//--
// Crypto AES ops
-def : InstRW<[VulcanWrite_5Cyc_F1], (instregex "^AES")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES")>;
// Crypto polynomial (64x64) multiply long
-def : InstRW<[VulcanWrite_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>;
// Crypto SHA1 xor ops
// Crypto SHA1 schedule acceleration ops
// Crypto SHA256 schedule acceleration op (1 u-op)
// Crypto SHA256 schedule acceleration op (2 u-ops)
// Crypto SHA256 hash acceleration ops
-def : InstRW<[VulcanWrite_7Cyc_F1], (instregex "^SHA")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA")>;
//--
// 3.18 CRC
//--
// CRC checksum ops
-def : InstRW<[VulcanWrite_4Cyc_I1], (instregex "^CRC32")>;
+def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32")>;
-} // SchedModel = VulcanModel
+} // SchedModel = ThunderX2T99Model
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 66a8f332513a..7f5507371fa0 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -42,10 +42,12 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
Entry.Node = Size;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args))
- .setDiscardResult();
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(bzeroEntry, IntPtr),
+ std::move(Args))
+ .setDiscardResult();
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
}
@@ -53,7 +55,5 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
}
bool AArch64SelectionDAGInfo::generateFMAsInMachineCombiner(
CodeGenOpt::Level OptLevel) const {
- if (OptLevel >= CodeGenOpt::Aggressive)
- return true;
- return false;
+ return OptLevel >= CodeGenOpt::Aggressive;
}
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 03e01329e036..b3aba4781db8 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -81,8 +81,22 @@ void AArch64Subtarget::initializeProperties() {
MinPrefetchStride = 1024;
MaxPrefetchIterationsAhead = 11;
break;
- case Vulcan:
+ case ThunderX2T99:
+ CacheLineSize = 64;
+ PrefFunctionAlignment = 3;
+ PrefLoopAlignment = 2;
MaxInterleaveFactor = 4;
+ PrefetchDistance = 128;
+ MinPrefetchStride = 1024;
+ MaxPrefetchIterationsAhead = 4;
+ break;
+ case ThunderX:
+ case ThunderXT88:
+ case ThunderXT81:
+ case ThunderXT83:
+ CacheLineSize = 128;
+ PrefFunctionAlignment = 3;
+ PrefLoopAlignment = 2;
break;
case CortexA35: break;
case CortexA53: break;
@@ -133,9 +147,9 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return AArch64II::MO_GOT;
- // The small code mode's direct accesses use ADRP, which cannot necessarily
- // produce the value 0 (if the code is above 4GB).
- if (TM.getCodeModel() == CodeModel::Small && GV->hasExternalWeakLinkage())
+ // The small code model's direct accesses use ADRP, which cannot
+ // necessarily produce the value 0 (if the code is above 4GB).
+ if (useSmallAddressing() && GV->hasExternalWeakLinkage())
return AArch64II::MO_GOT;
return AArch64II::MO_NO_FLAG;
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index a99340225082..40ad9185012c 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -45,7 +45,11 @@ public:
ExynosM1,
Falkor,
Kryo,
- Vulcan
+ ThunderX2T99,
+ ThunderX,
+ ThunderXT81,
+ ThunderXT83,
+ ThunderXT88
};
protected:
@@ -61,9 +65,11 @@ protected:
bool HasCRC = false;
bool HasLSE = false;
bool HasRAS = false;
+ bool HasRDM = false;
bool HasPerfMon = false;
bool HasFullFP16 = false;
bool HasSPE = false;
+ bool HasLSLFast = false;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
@@ -73,6 +79,10 @@ protected:
// StrictAlign - Disallow unaligned memory accesses.
bool StrictAlign = false;
+
+ // NegativeImmediates - transform instructions with negative immediates
+ bool NegativeImmediates = true;
+
bool UseAA = false;
bool PredictableSelectIsExpensive = false;
bool BalanceFPOps = false;
@@ -83,6 +93,8 @@ protected:
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
+ bool HasFuseAES = false;
+ bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
uint8_t MaxInterleaveFactor = 2;
@@ -183,6 +195,7 @@ public:
bool hasCRC() const { return HasCRC; }
bool hasLSE() const { return HasLSE; }
bool hasRAS() const { return HasRAS; }
+ bool hasRDM() const { return HasRDM; }
bool balanceFPOps() const { return BalanceFPOps; }
bool predictableSelectIsExpensive() const {
return PredictableSelectIsExpensive;
@@ -195,6 +208,8 @@ public:
}
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
+ bool hasFuseAES() const { return HasFuseAES; }
+ bool hasFuseLiterals() const { return HasFuseLiterals; }
bool useRSqrt() const { return UseRSqrt; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
@@ -218,6 +233,7 @@ public:
bool hasPerfMon() const { return HasPerfMon; }
bool hasFullFP16() const { return HasFullFP16; }
bool hasSPE() const { return HasSPE; }
+ bool hasLSLFast() const { return HasLSLFast; }
bool isLittleEndian() const { return IsLittle; }
@@ -226,6 +242,7 @@ public:
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
+ bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
@@ -233,9 +250,17 @@ public:
bool useAA() const override { return UseAA; }
- /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
- /// that still makes it profitable to inline the call.
- unsigned getMaxInlineSizeThreshold() const { return 64; }
+ bool useSmallAddressing() const {
+ switch (TLInfo.getTargetMachine().getCodeModel()) {
+ case CodeModel::Kernel:
+ // Kernel is currently allowed only for Fuchsia targets,
+ // where it is the same as Small for almost all purposes.
+ case CodeModel::Small:
+ return true;
+ default:
+ return false;
+ }
+ }
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
diff --git a/lib/Target/AArch64/AArch64SystemOperands.td b/lib/Target/AArch64/AArch64SystemOperands.td
index a3736c0868fb..7c5dcb0853eb 100644
--- a/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/lib/Target/AArch64/AArch64SystemOperands.td
@@ -18,35 +18,37 @@ include "llvm/TableGen/SearchableTable.td"
// AT (address translate) instruction options.
//===----------------------------------------------------------------------===//
-class AT<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+class AT<string name, bits<3> op1, bits<4> crn, bits<4> crm,
bits<3> op2> : SearchableTable {
let SearchableFields = ["Name", "Encoding"];
let EnumValueField = "Encoding";
string Name = name;
- bits<16> Encoding;
- let Encoding{15-14} = op0;
+ bits<14> Encoding;
let Encoding{13-11} = op1;
let Encoding{10-7} = crn;
let Encoding{6-3} = crm;
let Encoding{2-0} = op2;
+ code Requires = [{ {} }];
}
-def : AT<"S1E1R", 0b01, 0b000, 0b0111, 0b1000, 0b000>;
-def : AT<"S1E2R", 0b01, 0b100, 0b0111, 0b1000, 0b000>;
-def : AT<"S1E3R", 0b01, 0b110, 0b0111, 0b1000, 0b000>;
-def : AT<"S1E1W", 0b01, 0b000, 0b0111, 0b1000, 0b001>;
-def : AT<"S1E2W", 0b01, 0b100, 0b0111, 0b1000, 0b001>;
-def : AT<"S1E3W", 0b01, 0b110, 0b0111, 0b1000, 0b001>;
-def : AT<"S1E0R", 0b01, 0b000, 0b0111, 0b1000, 0b010>;
-def : AT<"S1E0W", 0b01, 0b000, 0b0111, 0b1000, 0b011>;
-def : AT<"S12E1R", 0b01, 0b100, 0b0111, 0b1000, 0b100>;
-def : AT<"S12E1W", 0b01, 0b100, 0b0111, 0b1000, 0b101>;
-def : AT<"S12E0R", 0b01, 0b100, 0b0111, 0b1000, 0b110>;
-def : AT<"S12E0W", 0b01, 0b100, 0b0111, 0b1000, 0b111>;
-def : AT<"S1E1RP", 0b01, 0b000, 0b0111, 0b1001, 0b000>;
-def : AT<"S1E1WP", 0b01, 0b000, 0b0111, 0b1001, 0b001>;
-
+def : AT<"S1E1R", 0b000, 0b0111, 0b1000, 0b000>;
+def : AT<"S1E2R", 0b100, 0b0111, 0b1000, 0b000>;
+def : AT<"S1E3R", 0b110, 0b0111, 0b1000, 0b000>;
+def : AT<"S1E1W", 0b000, 0b0111, 0b1000, 0b001>;
+def : AT<"S1E2W", 0b100, 0b0111, 0b1000, 0b001>;
+def : AT<"S1E3W", 0b110, 0b0111, 0b1000, 0b001>;
+def : AT<"S1E0R", 0b000, 0b0111, 0b1000, 0b010>;
+def : AT<"S1E0W", 0b000, 0b0111, 0b1000, 0b011>;
+def : AT<"S12E1R", 0b100, 0b0111, 0b1000, 0b100>;
+def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>;
+def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>;
+def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>;
+
+let Requires = [{ {AArch64::HasV8_2aOps} }] in {
+def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>;
+def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>;
+}
//===----------------------------------------------------------------------===//
// DMB/DSB (data barrier) instruction options.
@@ -77,28 +79,31 @@ def : DB<"sy", 0xf>;
// DC (data cache maintenance) instruction options.
//===----------------------------------------------------------------------===//
-class DC<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+class DC<string name, bits<3> op1, bits<4> crn, bits<4> crm,
bits<3> op2> : SearchableTable {
let SearchableFields = ["Name", "Encoding"];
let EnumValueField = "Encoding";
string Name = name;
- bits<16> Encoding;
- let Encoding{15-14} = op0;
+ bits<14> Encoding;
let Encoding{13-11} = op1;
let Encoding{10-7} = crn;
let Encoding{6-3} = crm;
let Encoding{2-0} = op2;
+ code Requires = [{ {} }];
}
-def : DC<"ZVA", 0b01, 0b011, 0b0111, 0b0100, 0b001>;
-def : DC<"IVAC", 0b01, 0b000, 0b0111, 0b0110, 0b001>;
-def : DC<"ISW", 0b01, 0b000, 0b0111, 0b0110, 0b010>;
-def : DC<"CVAC", 0b01, 0b011, 0b0111, 0b1010, 0b001>;
-def : DC<"CSW", 0b01, 0b000, 0b0111, 0b1010, 0b010>;
-def : DC<"CVAU", 0b01, 0b011, 0b0111, 0b1011, 0b001>;
-def : DC<"CIVAC", 0b01, 0b011, 0b0111, 0b1110, 0b001>;
-def : DC<"CISW", 0b01, 0b000, 0b0111, 0b1110, 0b010>;
+def : DC<"ZVA", 0b011, 0b0111, 0b0100, 0b001>;
+def : DC<"IVAC", 0b000, 0b0111, 0b0110, 0b001>;
+def : DC<"ISW", 0b000, 0b0111, 0b0110, 0b010>;
+def : DC<"CVAC", 0b011, 0b0111, 0b1010, 0b001>;
+def : DC<"CSW", 0b000, 0b0111, 0b1010, 0b010>;
+def : DC<"CVAU", 0b011, 0b0111, 0b1011, 0b001>;
+def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>;
+def : DC<"CISW", 0b000, 0b0111, 0b1110, 0b010>;
+
+let Requires = [{ {AArch64::HasV8_2aOps} }] in
+def : DC<"CVAP", 0b011, 0b0111, 0b1100, 0b001>;
//===----------------------------------------------------------------------===//
// IC (instruction cache maintenance) instruction options.
@@ -120,7 +125,7 @@ class IC<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2,
def : IC<"IALLUIS", 0b000, 0b0111, 0b0001, 0b000, 0>;
def : IC<"IALLU", 0b000, 0b0111, 0b0101, 0b000, 0>;
-def : IC<"IVAU", 0b000, 0b0111, 0b0001, 0b000, 1>;
+def : IC<"IVAU", 0b011, 0b0111, 0b0101, 0b001, 1>;
//===----------------------------------------------------------------------===//
// ISB (instruction-fetch barrier) instruction options.
@@ -213,14 +218,13 @@ def : PSB<"csync", 0x11>;
// TLBI (translation lookaside buffer invalidate) instruction options.
//===----------------------------------------------------------------------===//
-class TLBI<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
bits<3> op2, bit needsreg = 1> : SearchableTable {
let SearchableFields = ["Name", "Encoding"];
let EnumValueField = "Encoding";
string Name = name;
- bits<16> Encoding;
- let Encoding{15-14} = op0;
+ bits<14> Encoding;
let Encoding{13-11} = op1;
let Encoding{10-7} = crn;
let Encoding{6-3} = crm;
@@ -228,38 +232,38 @@ class TLBI<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
bit NeedsReg = needsreg;
}
-def : TLBI<"IPAS2E1IS", 0b01, 0b100, 0b1000, 0b0000, 0b001>;
-def : TLBI<"IPAS2LE1IS", 0b01, 0b100, 0b1000, 0b0000, 0b101>;
-def : TLBI<"VMALLE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"ALLE2IS", 0b01, 0b100, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"ALLE3IS", 0b01, 0b110, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"VAE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b001>;
-def : TLBI<"VAE2IS", 0b01, 0b100, 0b1000, 0b0011, 0b001>;
-def : TLBI<"VAE3IS", 0b01, 0b110, 0b1000, 0b0011, 0b001>;
-def : TLBI<"ASIDE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b010>;
-def : TLBI<"VAAE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b011>;
-def : TLBI<"ALLE1IS", 0b01, 0b100, 0b1000, 0b0011, 0b100, 0>;
-def : TLBI<"VALE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VALE2IS", 0b01, 0b100, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VALE3IS", 0b01, 0b110, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VMALLS12E1IS", 0b01, 0b100, 0b1000, 0b0011, 0b110, 0>;
-def : TLBI<"VAALE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b111>;
-def : TLBI<"IPAS2E1", 0b01, 0b100, 0b1000, 0b0100, 0b001>;
-def : TLBI<"IPAS2LE1", 0b01, 0b100, 0b1000, 0b0100, 0b101>;
-def : TLBI<"VMALLE1", 0b01, 0b000, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"ALLE2", 0b01, 0b100, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"ALLE3", 0b01, 0b110, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"VAE1", 0b01, 0b000, 0b1000, 0b0111, 0b001>;
-def : TLBI<"VAE2", 0b01, 0b100, 0b1000, 0b0111, 0b001>;
-def : TLBI<"VAE3", 0b01, 0b110, 0b1000, 0b0111, 0b001>;
-def : TLBI<"ASIDE1", 0b01, 0b000, 0b1000, 0b0111, 0b010>;
-def : TLBI<"VAAE1", 0b01, 0b000, 0b1000, 0b0111, 0b011>;
-def : TLBI<"ALLE1", 0b01, 0b100, 0b1000, 0b0111, 0b100, 0>;
-def : TLBI<"VALE1", 0b01, 0b000, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VALE2", 0b01, 0b100, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VALE3", 0b01, 0b110, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VMALLS12E1", 0b01, 0b100, 0b1000, 0b0111, 0b110, 0>;
-def : TLBI<"VAALE1", 0b01, 0b000, 0b1000, 0b0111, 0b111>;
+def : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>;
+def : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>;
+def : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>;
+def : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>;
+def : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>;
+def : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>;
+def : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>;
+def : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>;
+def : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>;
+def : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>;
+def : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>;
+def : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>;
+def : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>;
+def : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>;
+def : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
+def : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>;
+def : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>;
+def : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>;
+def : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>;
+def : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>;
+def : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>;
+def : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>;
+def : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>;
+def : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>;
+def : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>;
+def : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>;
+def : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>;
+def : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>;
+def : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>;
+def : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>;
+def : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>;
+def : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index d2883941e2c4..dcc51bf02329 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -12,9 +12,11 @@
#include "AArch64.h"
#include "AArch64CallLowering.h"
-#include "AArch64InstructionSelector.h"
#include "AArch64LegalizerInfo.h"
+#include "AArch64MacroFusion.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
#include "AArch64RegisterBankInfo.h"
+#endif
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
@@ -115,7 +117,7 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
static cl::opt<bool>
EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden,
cl::desc("Enable the type promotion pass"),
- cl::init(true));
+ cl::init(false));
static cl::opt<bool>
EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
@@ -136,6 +138,11 @@ static cl::opt<bool>
cl::desc("Enable the loop data prefetch pass"),
cl::init(true));
+static cl::opt<int> EnableGlobalISelAtO(
+ "aarch64-enable-global-isel-at-O", cl::Hidden,
+ cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
+ cl::init(-1));
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -278,7 +285,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
// FIXME: At this point, we can't rely on Subtarget having RBI.
// It's awkward to mix passing RBI and the Subtarget; should we pass
// TII/TRI as well?
- GISel->InstSelector.reset(new AArch64InstructionSelector(*this, *I, *RBI));
+ GISel->InstSelector.reset(
+ createAArch64InstructionSelector(*this, *I, *RBI));
GISel->RegBankInfo.reset(RBI);
#endif
@@ -323,10 +331,24 @@ public:
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
- DAG->addMutation(createMacroFusionDAGMutation(DAG->TII));
+ DAG->addMutation(createAArch64MacroFusionDAGMutation());
return DAG;
}
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
+ if (ST.hasFuseLiterals()) {
+ // Run the Macro Fusion after RA again since literals are expanded from
+ // pseudos then (v. addPreSched2()).
+ ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+ DAG->addMutation(createAArch64MacroFusionDAGMutation());
+ return DAG;
+ }
+
+ return nullptr;
+ }
+
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
@@ -341,6 +363,8 @@ public:
void addPostRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
+
+ bool isGlobalISelEnabled() const override;
};
} // end anonymous namespace
@@ -450,6 +474,10 @@ bool AArch64PassConfig::addGlobalInstructionSelect() {
}
#endif
+bool AArch64PassConfig::isGlobalISelEnabled() const {
+ return TM->getOptLevel() <= EnableGlobalISelAtO;
+}
+
bool AArch64PassConfig::addILPOpts() {
if (EnableCondOpt)
addPass(createAArch64ConditionOptimizerPass());
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 6fa5e83957e1..2c75a3258c1c 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -21,6 +21,8 @@
namespace llvm {
+class AArch64RegisterBankInfo;
+
class AArch64TargetMachine : public LLVMTargetMachine {
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b8833e5a5552..4d59da0c646d 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -176,7 +176,8 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -436,7 +437,7 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
}
int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) {
+ Type *CondTy, const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// We don't lower some vector selects well that are wider than the register
@@ -463,11 +464,12 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return Entry->Cost;
}
}
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
- unsigned Alignment, unsigned AddressSpace) {
+ unsigned Alignment, unsigned AddressSpace,
+ const Instruction *I) {
auto LT = TLI->getTypeLegalizationCost(DL, Ty);
if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
@@ -505,12 +507,14 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
unsigned NumElts = VecTy->getVectorNumElements();
- Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+ auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
// ldN/stN only support legal vector types of size 64 or 128 in bits.
- if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
- return Factor;
+ // Accesses having vector types that are a multiple of 128 bits can be
+ // matched to more than one ldN/stN instruction.
+ if (NumElts % Factor == 0 &&
+ TLI->isLegalInterleavedAccessType(SubVecTy, DL))
+ return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
@@ -594,8 +598,6 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::aarch64_neon_ld4:
Info.ReadMem = true;
Info.WriteMem = false;
- Info.IsSimple = true;
- Info.NumMemRefs = 1;
Info.PtrVal = Inst->getArgOperand(0);
break;
case Intrinsic::aarch64_neon_st2:
@@ -603,8 +605,6 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::aarch64_neon_st4:
Info.ReadMem = false;
Info.WriteMem = true;
- Info.IsSimple = true;
- Info.NumMemRefs = 1;
Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
break;
}
@@ -628,6 +628,38 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
return true;
}
+/// See if \p I should be considered for address type promotion. We check if \p
+/// I is a sext with right type and used in memory accesses. If it used in a
+/// "complex" getelementptr, we allow it to be promoted without finding other
+/// sext instructions that sign extended the same initial value. A getelementptr
+/// is considered as "complex" if it has more than 2 operands.
+bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
+ bool Considerable = false;
+ AllowPromotionWithoutCommonHeader = false;
+ if (!isa<SExtInst>(&I))
+ return false;
+ Type *ConsideredSExtType =
+ Type::getInt64Ty(I.getParent()->getParent()->getContext());
+ if (I.getType() != ConsideredSExtType)
+ return false;
+ // See if the sext is the one with the right type and used in at least one
+ // GetElementPtrInst.
+ for (const User *U : I.users()) {
+ if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
+ Considerable = true;
+ // A getelementptr is considered as "complex" if it has more than 2
+ // operands. We will promote a SExt used in such complex GEP as we
+ // expect some computation to be merged if they are done on 64 bits.
+ if (GEPInst->getNumOperands() > 2) {
+ AllowPromotionWithoutCommonHeader = true;
+ break;
+ }
+ }
+ }
+ return Considerable;
+}
+
unsigned AArch64TTIImpl::getCacheLineSize() {
return ST->getCacheLineSize();
}
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 18287ed6653f..e37c003e064c 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -34,10 +34,6 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
const AArch64Subtarget *ST;
const AArch64TargetLowering *TLI;
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
const AArch64Subtarget *getST() const { return ST; }
const AArch64TargetLowering *getTLI() const { return TLI; }
@@ -90,7 +86,8 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF);
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index);
@@ -107,10 +104,11 @@ public:
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, const Instruction *I = nullptr);
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
@@ -125,6 +123,10 @@ public:
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace);
+ bool
+ shouldConsiderAddressTypePromotion(const Instruction &I,
+ bool &AllowPromotionWithoutCommonHeader);
+
unsigned getCacheLineSize();
unsigned getPrefetchDistance();
diff --git a/lib/Target/AArch64/AArch64VectorByElementOpt.cpp b/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
index e3b1d7cea48d..f53af2315ec9 100644
--- a/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
+++ b/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
@@ -19,13 +19,27 @@
// is rewritten into
// dup v3.4s, v2.s[1]
// fmla v0.4s, v1.4s, v3.4s
+//
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <map>
using namespace llvm;
@@ -41,14 +55,15 @@ namespace {
struct AArch64VectorByElementOpt : public MachineFunctionPass {
static char ID;
- AArch64VectorByElementOpt() : MachineFunctionPass(ID) {
- initializeAArch64VectorByElementOptPass(*PassRegistry::getPassRegistry());
- }
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
TargetSchedModel SchedModel;
+ AArch64VectorByElementOpt() : MachineFunctionPass(ID) {
+ initializeAArch64VectorByElementOptPass(*PassRegistry::getPassRegistry());
+ }
+
/// Based only on latency of instructions, determine if it is cost efficient
/// to replace the instruction InstDesc by the two instructions InstDescRep1
/// and InstDescRep2.
@@ -90,8 +105,10 @@ struct AArch64VectorByElementOpt : public MachineFunctionPass {
return AARCH64_VECTOR_BY_ELEMENT_OPT_NAME;
}
};
+
char AArch64VectorByElementOpt::ID = 0;
-} // namespace
+
+} // end anonymous namespace
INITIALIZE_PASS(AArch64VectorByElementOpt, "aarch64-vectorbyelement-opt",
AARCH64_VECTOR_BY_ELEMENT_OPT_NAME, false, false)
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index b86a283b40d4..cbab68979c56 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -74,6 +74,7 @@ private:
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
+ void createSysAlias(uint16_t Encoding, OperandVector &Operands, SMLoc S);
AArch64CC::CondCode parseCondCodeString(StringRef Cond);
bool parseCondCode(OperandVector &Operands, bool invertCondCode);
unsigned matchRegisterNameAlias(StringRef Name, bool isVector);
@@ -537,154 +538,15 @@ public:
return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000;
}
- bool isImm0_1() const {
+ template <int N, int M>
+ bool isImmInRange() const {
if (!isImm())
return false;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
if (!MCE)
return false;
int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 2);
- }
-
- bool isImm0_7() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 8);
- }
-
- bool isImm1_8() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val > 0 && Val < 9);
- }
-
- bool isImm0_15() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 16);
- }
-
- bool isImm1_16() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val > 0 && Val < 17);
- }
-
- bool isImm0_31() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 32);
- }
-
- bool isImm1_31() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 32);
- }
-
- bool isImm1_32() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 33);
- }
-
- bool isImm0_63() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 64);
- }
-
- bool isImm1_63() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 64);
- }
-
- bool isImm1_64() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 65);
- }
-
- bool isImm0_127() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 128);
- }
-
- bool isImm0_255() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 256);
- }
-
- bool isImm0_65535() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 65536);
- }
-
- bool isImm32_63() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 32 && Val < 64);
+ return (Val >= N && Val <= M);
}
bool isLogicalImm32() const {
@@ -804,31 +666,8 @@ public:
return AArch64_AM::isAdvSIMDModImmType10(MCE->getValue());
}
- bool isBranchTarget26() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return true;
- int64_t Val = MCE->getValue();
- if (Val & 0x3)
- return false;
- return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2));
- }
-
- bool isPCRelLabel19() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return true;
- int64_t Val = MCE->getValue();
- if (Val & 0x3)
- return false;
- return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2));
- }
-
- bool isBranchTarget14() const {
+ template<int N>
+ bool isBranchTarget() const {
if (!isImm())
return false;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
@@ -837,7 +676,8 @@ public:
int64_t Val = MCE->getValue();
if (Val & 0x3)
return false;
- return (Val >= -(0x2000 << 2) && Val <= (0x1fff << 2));
+ assert(N > 0 && "Branch target immediate cannot be 0 bits!");
+ return (Val >= -((1<<(N-1)) << 2) && Val <= (((1<<(N-1))-1) << 2));
}
bool
@@ -2494,6 +2334,35 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
return MatchOperand_Success;
}
+static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
+ if (FBS[AArch64::HasV8_1aOps])
+ Str += "ARMv8.1a";
+ else if (FBS[AArch64::HasV8_2aOps])
+ Str += "ARMv8.2a";
+ else
+ Str += "(unknown)";
+}
+
+void AArch64AsmParser::createSysAlias(uint16_t Encoding, OperandVector &Operands,
+ SMLoc S) {
+ const uint16_t Op2 = Encoding & 7;
+ const uint16_t Cm = (Encoding & 0x78) >> 3;
+ const uint16_t Cn = (Encoding & 0x780) >> 7;
+ const uint16_t Op1 = (Encoding & 0x3800) >> 11;
+
+ const MCExpr *Expr = MCConstantExpr::create(Op1, getContext());
+
+ Operands.push_back(
+ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext()));
+ Operands.push_back(
+ AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext()));
+ Operands.push_back(
+ AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext()));
+ Expr = MCConstantExpr::create(Op2, getContext());
+ Operands.push_back(
+ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext()));
+}
+
/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for
/// the SYS instruction. Parse them specially so that we create a SYS MCInst.
bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
@@ -2510,228 +2379,48 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
StringRef Op = Tok.getString();
SMLoc S = Tok.getLoc();
- const MCExpr *Expr = nullptr;
-
-#define SYS_ALIAS(op1, Cn, Cm, op2) \
- do { \
- Expr = MCConstantExpr::create(op1, getContext()); \
- Operands.push_back( \
- AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
- Operands.push_back( \
- AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \
- Operands.push_back( \
- AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \
- Expr = MCConstantExpr::create(op2, getContext()); \
- Operands.push_back( \
- AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
- } while (false)
-
if (Mnemonic == "ic") {
- if (!Op.compare_lower("ialluis")) {
- // SYS #0, C7, C1, #0
- SYS_ALIAS(0, 7, 1, 0);
- } else if (!Op.compare_lower("iallu")) {
- // SYS #0, C7, C5, #0
- SYS_ALIAS(0, 7, 5, 0);
- } else if (!Op.compare_lower("ivau")) {
- // SYS #3, C7, C5, #1
- SYS_ALIAS(3, 7, 5, 1);
- } else {
+ const AArch64IC::IC *IC = AArch64IC::lookupICByName(Op);
+ if (!IC)
return TokError("invalid operand for IC instruction");
+ else if (!IC->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("IC " + std::string(IC->Name) + " requires ");
+ setRequiredFeatureString(IC->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
}
+ createSysAlias(IC->Encoding, Operands, S);
} else if (Mnemonic == "dc") {
- if (!Op.compare_lower("zva")) {
- // SYS #3, C7, C4, #1
- SYS_ALIAS(3, 7, 4, 1);
- } else if (!Op.compare_lower("ivac")) {
- // SYS #3, C7, C6, #1
- SYS_ALIAS(0, 7, 6, 1);
- } else if (!Op.compare_lower("isw")) {
- // SYS #0, C7, C6, #2
- SYS_ALIAS(0, 7, 6, 2);
- } else if (!Op.compare_lower("cvac")) {
- // SYS #3, C7, C10, #1
- SYS_ALIAS(3, 7, 10, 1);
- } else if (!Op.compare_lower("csw")) {
- // SYS #0, C7, C10, #2
- SYS_ALIAS(0, 7, 10, 2);
- } else if (!Op.compare_lower("cvau")) {
- // SYS #3, C7, C11, #1
- SYS_ALIAS(3, 7, 11, 1);
- } else if (!Op.compare_lower("civac")) {
- // SYS #3, C7, C14, #1
- SYS_ALIAS(3, 7, 14, 1);
- } else if (!Op.compare_lower("cisw")) {
- // SYS #0, C7, C14, #2
- SYS_ALIAS(0, 7, 14, 2);
- } else if (!Op.compare_lower("cvap")) {
- if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
- // SYS #3, C7, C12, #1
- SYS_ALIAS(3, 7, 12, 1);
- } else {
- return TokError("DC CVAP requires ARMv8.2a");
- }
- } else {
+ const AArch64DC::DC *DC = AArch64DC::lookupDCByName(Op);
+ if (!DC)
return TokError("invalid operand for DC instruction");
+ else if (!DC->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("DC " + std::string(DC->Name) + " requires ");
+ setRequiredFeatureString(DC->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
}
+ createSysAlias(DC->Encoding, Operands, S);
} else if (Mnemonic == "at") {
- if (!Op.compare_lower("s1e1r")) {
- // SYS #0, C7, C8, #0
- SYS_ALIAS(0, 7, 8, 0);
- } else if (!Op.compare_lower("s1e2r")) {
- // SYS #4, C7, C8, #0
- SYS_ALIAS(4, 7, 8, 0);
- } else if (!Op.compare_lower("s1e3r")) {
- // SYS #6, C7, C8, #0
- SYS_ALIAS(6, 7, 8, 0);
- } else if (!Op.compare_lower("s1e1w")) {
- // SYS #0, C7, C8, #1
- SYS_ALIAS(0, 7, 8, 1);
- } else if (!Op.compare_lower("s1e2w")) {
- // SYS #4, C7, C8, #1
- SYS_ALIAS(4, 7, 8, 1);
- } else if (!Op.compare_lower("s1e3w")) {
- // SYS #6, C7, C8, #1
- SYS_ALIAS(6, 7, 8, 1);
- } else if (!Op.compare_lower("s1e0r")) {
- // SYS #0, C7, C8, #3
- SYS_ALIAS(0, 7, 8, 2);
- } else if (!Op.compare_lower("s1e0w")) {
- // SYS #0, C7, C8, #3
- SYS_ALIAS(0, 7, 8, 3);
- } else if (!Op.compare_lower("s12e1r")) {
- // SYS #4, C7, C8, #4
- SYS_ALIAS(4, 7, 8, 4);
- } else if (!Op.compare_lower("s12e1w")) {
- // SYS #4, C7, C8, #5
- SYS_ALIAS(4, 7, 8, 5);
- } else if (!Op.compare_lower("s12e0r")) {
- // SYS #4, C7, C8, #6
- SYS_ALIAS(4, 7, 8, 6);
- } else if (!Op.compare_lower("s12e0w")) {
- // SYS #4, C7, C8, #7
- SYS_ALIAS(4, 7, 8, 7);
- } else if (!Op.compare_lower("s1e1rp")) {
- if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
- // SYS #0, C7, C9, #0
- SYS_ALIAS(0, 7, 9, 0);
- } else {
- return TokError("AT S1E1RP requires ARMv8.2a");
- }
- } else if (!Op.compare_lower("s1e1wp")) {
- if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
- // SYS #0, C7, C9, #1
- SYS_ALIAS(0, 7, 9, 1);
- } else {
- return TokError("AT S1E1WP requires ARMv8.2a");
- }
- } else {
+ const AArch64AT::AT *AT = AArch64AT::lookupATByName(Op);
+ if (!AT)
return TokError("invalid operand for AT instruction");
+ else if (!AT->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("AT " + std::string(AT->Name) + " requires ");
+ setRequiredFeatureString(AT->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
}
+ createSysAlias(AT->Encoding, Operands, S);
} else if (Mnemonic == "tlbi") {
- if (!Op.compare_lower("vmalle1is")) {
- // SYS #0, C8, C3, #0
- SYS_ALIAS(0, 8, 3, 0);
- } else if (!Op.compare_lower("alle2is")) {
- // SYS #4, C8, C3, #0
- SYS_ALIAS(4, 8, 3, 0);
- } else if (!Op.compare_lower("alle3is")) {
- // SYS #6, C8, C3, #0
- SYS_ALIAS(6, 8, 3, 0);
- } else if (!Op.compare_lower("vae1is")) {
- // SYS #0, C8, C3, #1
- SYS_ALIAS(0, 8, 3, 1);
- } else if (!Op.compare_lower("vae2is")) {
- // SYS #4, C8, C3, #1
- SYS_ALIAS(4, 8, 3, 1);
- } else if (!Op.compare_lower("vae3is")) {
- // SYS #6, C8, C3, #1
- SYS_ALIAS(6, 8, 3, 1);
- } else if (!Op.compare_lower("aside1is")) {
- // SYS #0, C8, C3, #2
- SYS_ALIAS(0, 8, 3, 2);
- } else if (!Op.compare_lower("vaae1is")) {
- // SYS #0, C8, C3, #3
- SYS_ALIAS(0, 8, 3, 3);
- } else if (!Op.compare_lower("alle1is")) {
- // SYS #4, C8, C3, #4
- SYS_ALIAS(4, 8, 3, 4);
- } else if (!Op.compare_lower("vale1is")) {
- // SYS #0, C8, C3, #5
- SYS_ALIAS(0, 8, 3, 5);
- } else if (!Op.compare_lower("vaale1is")) {
- // SYS #0, C8, C3, #7
- SYS_ALIAS(0, 8, 3, 7);
- } else if (!Op.compare_lower("vmalle1")) {
- // SYS #0, C8, C7, #0
- SYS_ALIAS(0, 8, 7, 0);
- } else if (!Op.compare_lower("alle2")) {
- // SYS #4, C8, C7, #0
- SYS_ALIAS(4, 8, 7, 0);
- } else if (!Op.compare_lower("vale2is")) {
- // SYS #4, C8, C3, #5
- SYS_ALIAS(4, 8, 3, 5);
- } else if (!Op.compare_lower("vale3is")) {
- // SYS #6, C8, C3, #5
- SYS_ALIAS(6, 8, 3, 5);
- } else if (!Op.compare_lower("alle3")) {
- // SYS #6, C8, C7, #0
- SYS_ALIAS(6, 8, 7, 0);
- } else if (!Op.compare_lower("vae1")) {
- // SYS #0, C8, C7, #1
- SYS_ALIAS(0, 8, 7, 1);
- } else if (!Op.compare_lower("vae2")) {
- // SYS #4, C8, C7, #1
- SYS_ALIAS(4, 8, 7, 1);
- } else if (!Op.compare_lower("vae3")) {
- // SYS #6, C8, C7, #1
- SYS_ALIAS(6, 8, 7, 1);
- } else if (!Op.compare_lower("aside1")) {
- // SYS #0, C8, C7, #2
- SYS_ALIAS(0, 8, 7, 2);
- } else if (!Op.compare_lower("vaae1")) {
- // SYS #0, C8, C7, #3
- SYS_ALIAS(0, 8, 7, 3);
- } else if (!Op.compare_lower("alle1")) {
- // SYS #4, C8, C7, #4
- SYS_ALIAS(4, 8, 7, 4);
- } else if (!Op.compare_lower("vale1")) {
- // SYS #0, C8, C7, #5
- SYS_ALIAS(0, 8, 7, 5);
- } else if (!Op.compare_lower("vale2")) {
- // SYS #4, C8, C7, #5
- SYS_ALIAS(4, 8, 7, 5);
- } else if (!Op.compare_lower("vale3")) {
- // SYS #6, C8, C7, #5
- SYS_ALIAS(6, 8, 7, 5);
- } else if (!Op.compare_lower("vaale1")) {
- // SYS #0, C8, C7, #7
- SYS_ALIAS(0, 8, 7, 7);
- } else if (!Op.compare_lower("ipas2e1")) {
- // SYS #4, C8, C4, #1
- SYS_ALIAS(4, 8, 4, 1);
- } else if (!Op.compare_lower("ipas2le1")) {
- // SYS #4, C8, C4, #5
- SYS_ALIAS(4, 8, 4, 5);
- } else if (!Op.compare_lower("ipas2e1is")) {
- // SYS #4, C8, C4, #1
- SYS_ALIAS(4, 8, 0, 1);
- } else if (!Op.compare_lower("ipas2le1is")) {
- // SYS #4, C8, C4, #5
- SYS_ALIAS(4, 8, 0, 5);
- } else if (!Op.compare_lower("vmalls12e1")) {
- // SYS #4, C8, C7, #6
- SYS_ALIAS(4, 8, 7, 6);
- } else if (!Op.compare_lower("vmalls12e1is")) {
- // SYS #4, C8, C3, #6
- SYS_ALIAS(4, 8, 3, 6);
- } else {
+ const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByName(Op);
+ if (!TLBI)
return TokError("invalid operand for TLBI instruction");
+ else if (!TLBI->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("TLBI " + std::string(TLBI->Name) + " requires ");
+ setRequiredFeatureString(TLBI->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
}
+ createSysAlias(TLBI->Encoding, Operands, S);
}
-#undef SYS_ALIAS
-
Parser.Lex(); // Eat operand.
bool ExpectRegister = (Op.lower().find("all") == StringRef::npos);
@@ -2744,12 +2433,10 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
HasRegister = true;
}
- if (ExpectRegister && !HasRegister) {
+ if (ExpectRegister && !HasRegister)
return TokError("specified " + Mnemonic + " op requires a register");
- }
- else if (!ExpectRegister && HasRegister) {
+ else if (!ExpectRegister && HasRegister)
return TokError("specified " + Mnemonic + " op does not use a register");
- }
if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
return true;
@@ -2884,7 +2571,6 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
/// parseRegister - Parse a non-vector register operand.
bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
// Try for a vector register.
if (!tryParseVectorRegister(Operands))
@@ -2897,30 +2583,6 @@ bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateReg(Reg, false, S, getLoc(), getContext()));
- // A small number of instructions (FMOVXDhighr, for example) have "[1]"
- // as a string token in the instruction itself.
- SMLoc LBracS = getLoc();
- const AsmToken &Tok = Parser.getTok();
- if (parseOptionalToken(AsmToken::LBrac)) {
- if (Tok.is(AsmToken::Integer)) {
- SMLoc IntS = getLoc();
- int64_t Val = Tok.getIntVal();
- if (Val == 1) {
- Parser.Lex();
- SMLoc RBracS = getLoc();
- if (parseOptionalToken(AsmToken::RBrac)) {
- Operands.push_back(
- AArch64Operand::CreateToken("[", false, LBracS, getContext()));
- Operands.push_back(
- AArch64Operand::CreateToken("1", false, IntS, getContext()));
- Operands.push_back(
- AArch64Operand::CreateToken("]", false, RBracS, getContext()));
- return false;
- }
- }
- }
- }
-
return false;
}
@@ -3696,6 +3358,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
return Error(Loc, "immediate must be an integer in range [0, 63].");
case Match_InvalidImm0_127:
return Error(Loc, "immediate must be an integer in range [0, 127].");
+ case Match_InvalidImm0_255:
+ return Error(Loc, "immediate must be an integer in range [0, 255].");
case Match_InvalidImm0_65535:
return Error(Loc, "immediate must be an integer in range [0, 65535].");
case Match_InvalidImm1_8:
@@ -4120,6 +3784,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidImm0_31:
case Match_InvalidImm0_63:
case Match_InvalidImm0_127:
+ case Match_InvalidImm0_255:
case Match_InvalidImm0_65535:
case Match_InvalidImm1_8:
case Match_InvalidImm1_16:
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
index 6bcf67fb3fef..6d0930c358f1 100644
--- a/lib/Target/AArch64/CMakeLists.txt
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -14,6 +14,7 @@ tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM AArch64GenSystemOperands.inc -gen-searchable-tables)
if(LLVM_BUILD_GLOBAL_ISEL)
+ tablegen(LLVM AArch64GenRegisterBank.inc -gen-register-bank)
tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
endif()
@@ -55,6 +56,7 @@ add_llvm_target(AArch64CodeGen
AArch64ISelLowering.cpp
AArch64InstrInfo.cpp
AArch64LoadStoreOptimizer.cpp
+ AArch64MacroFusion.cpp
AArch64MCInstLower.cpp
AArch64PromoteConstant.cpp
AArch64PBQPRegAlloc.cpp
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index b4f85204714f..41ae70f85e58 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -16,12 +16,20 @@
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -451,8 +459,8 @@ static const LdStNInstrDesc LdStNInstInfo[] = {
{ AArch64::LD3i64, "ld3", ".d", 1, true, 0 },
{ AArch64::LD3i8_POST, "ld3", ".b", 2, true, 3 },
{ AArch64::LD3i16_POST, "ld3", ".h", 2, true, 6 },
- { AArch64::LD3i32_POST, "ld3", ".s", 2, true, 12 },
- { AArch64::LD3i64_POST, "ld3", ".d", 2, true, 24 },
+ { AArch64::LD3i32_POST, "ld3", ".s", 2, true, 12 },
+ { AArch64::LD3i64_POST, "ld3", ".d", 2, true, 24 },
{ AArch64::LD3Rv16b, "ld3r", ".16b", 0, false, 0 },
{ AArch64::LD3Rv8h, "ld3r", ".8h", 0, false, 0 },
{ AArch64::LD3Rv4s, "ld3r", ".4s", 0, false, 0 },
@@ -731,7 +739,6 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!");
#endif
- const char *Asm = nullptr;
const MCOperand &Op1 = MI->getOperand(0);
const MCOperand &Cn = MI->getOperand(1);
const MCOperand &Cm = MI->getOperand(2);
@@ -742,230 +749,74 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
unsigned CmVal = Cm.getImm();
unsigned Op2Val = Op2.getImm();
+ uint16_t Encoding = Op2Val;
+ Encoding |= CmVal << 3;
+ Encoding |= CnVal << 7;
+ Encoding |= Op1Val << 11;
+
+ bool NeedsReg;
+ std::string Ins;
+ std::string Name;
+
if (CnVal == 7) {
switch (CmVal) {
- default:
- break;
-
+ default: return false;
// IC aliases
- case 1:
- if (Op1Val == 0 && Op2Val == 0)
- Asm = "ic\tialluis";
- break;
- case 5:
- if (Op1Val == 0 && Op2Val == 0)
- Asm = "ic\tiallu";
- else if (Op1Val == 3 && Op2Val == 1)
- Asm = "ic\tivau";
- break;
-
+ case 1: case 5: {
+ const AArch64IC::IC *IC = AArch64IC::lookupICByEncoding(Encoding);
+ if (!IC || !IC->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = IC->NeedsReg;
+ Ins = "ic\t";
+ Name = std::string(IC->Name);
+ }
+ break;
// DC aliases
- case 4:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tzva";
- break;
- case 6:
- if (Op1Val == 0 && Op2Val == 1)
- Asm = "dc\tivac";
- if (Op1Val == 0 && Op2Val == 2)
- Asm = "dc\tisw";
- break;
- case 10:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tcvac";
- else if (Op1Val == 0 && Op2Val == 2)
- Asm = "dc\tcsw";
- break;
- case 11:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tcvau";
- break;
- case 12:
- if (Op1Val == 3 && Op2Val == 1 &&
- (STI.getFeatureBits()[AArch64::HasV8_2aOps]))
- Asm = "dc\tcvap";
- break;
- case 14:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tcivac";
- else if (Op1Val == 0 && Op2Val == 2)
- Asm = "dc\tcisw";
- break;
-
+ case 4: case 6: case 10: case 11: case 12: case 14:
+ {
+ const AArch64DC::DC *DC = AArch64DC::lookupDCByEncoding(Encoding);
+ if (!DC || !DC->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = true;
+ Ins = "dc\t";
+ Name = std::string(DC->Name);
+ }
+ break;
// AT aliases
- case 8:
- switch (Op1Val) {
- default:
- break;
- case 0:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e1r"; break;
- case 1: Asm = "at\ts1e1w"; break;
- case 2: Asm = "at\ts1e0r"; break;
- case 3: Asm = "at\ts1e0w"; break;
- }
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e2r"; break;
- case 1: Asm = "at\ts1e2w"; break;
- case 4: Asm = "at\ts12e1r"; break;
- case 5: Asm = "at\ts12e1w"; break;
- case 6: Asm = "at\ts12e0r"; break;
- case 7: Asm = "at\ts12e0w"; break;
- }
- break;
- case 6:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e3r"; break;
- case 1: Asm = "at\ts1e3w"; break;
- }
- break;
- }
- break;
- case 9:
- switch (Op1Val) {
- default:
- break;
- case 0:
- if (STI.getFeatureBits()[AArch64::HasV8_2aOps]) {
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e1rp"; break;
- case 1: Asm = "at\ts1e1wp"; break;
- }
- }
- break;
- }
+ case 8: case 9: {
+ const AArch64AT::AT *AT = AArch64AT::lookupATByEncoding(Encoding);
+ if (!AT || !AT->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = true;
+ Ins = "at\t";
+ Name = std::string(AT->Name);
+ }
+ break;
}
} else if (CnVal == 8) {
// TLBI aliases
- switch (CmVal) {
- default:
- break;
- case 3:
- switch (Op1Val) {
- default:
- break;
- case 0:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\tvmalle1is"; break;
- case 1: Asm = "tlbi\tvae1is"; break;
- case 2: Asm = "tlbi\taside1is"; break;
- case 3: Asm = "tlbi\tvaae1is"; break;
- case 5: Asm = "tlbi\tvale1is"; break;
- case 7: Asm = "tlbi\tvaale1is"; break;
- }
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle2is"; break;
- case 1: Asm = "tlbi\tvae2is"; break;
- case 4: Asm = "tlbi\talle1is"; break;
- case 5: Asm = "tlbi\tvale2is"; break;
- case 6: Asm = "tlbi\tvmalls12e1is"; break;
- }
- break;
- case 6:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle3is"; break;
- case 1: Asm = "tlbi\tvae3is"; break;
- case 5: Asm = "tlbi\tvale3is"; break;
- }
- break;
- }
- break;
- case 0:
- switch (Op1Val) {
- default:
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 1: Asm = "tlbi\tipas2e1is"; break;
- case 5: Asm = "tlbi\tipas2le1is"; break;
- }
- break;
- }
- break;
- case 4:
- switch (Op1Val) {
- default:
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 1: Asm = "tlbi\tipas2e1"; break;
- case 5: Asm = "tlbi\tipas2le1"; break;
- }
- break;
- }
- break;
- case 7:
- switch (Op1Val) {
- default:
- break;
- case 0:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\tvmalle1"; break;
- case 1: Asm = "tlbi\tvae1"; break;
- case 2: Asm = "tlbi\taside1"; break;
- case 3: Asm = "tlbi\tvaae1"; break;
- case 5: Asm = "tlbi\tvale1"; break;
- case 7: Asm = "tlbi\tvaale1"; break;
- }
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle2"; break;
- case 1: Asm = "tlbi\tvae2"; break;
- case 4: Asm = "tlbi\talle1"; break;
- case 5: Asm = "tlbi\tvale2"; break;
- case 6: Asm = "tlbi\tvmalls12e1"; break;
- }
- break;
- case 6:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle3"; break;
- case 1: Asm = "tlbi\tvae3"; break;
- case 5: Asm = "tlbi\tvale3"; break;
- }
- break;
- }
- break;
- }
+ const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByEncoding(Encoding);
+ if (!TLBI || !TLBI->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = TLBI->NeedsReg;
+ Ins = "tlbi\t";
+ Name = std::string(TLBI->Name);
}
+ else
+ return false;
- if (Asm) {
- unsigned Reg = MI->getOperand(4).getReg();
+ std::string Str = Ins + Name;
+ std::transform(Str.begin(), Str.end(), Str.begin(), ::tolower);
- O << '\t' << Asm;
- if (StringRef(Asm).lower().find("all") == StringRef::npos)
- O << ", " << getRegisterName(Reg);
- }
+ O << '\t' << Str;
+ if (NeedsReg)
+ O << ", " << getRegisterName(MI->getOperand(4).getReg());
- return Asm != nullptr;
+ return true;
}
void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 65dca99ed04e..a45258cb97b7 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -37,9 +38,11 @@ public:
unsigned PrintMethodIdx,
const MCSubtargetInfo &STI,
raw_ostream &O);
+
virtual StringRef getRegName(unsigned RegNo) const {
return getRegisterName(RegNo);
}
+
static const char *getRegisterName(unsigned RegNo,
unsigned AltIdx = AArch64::NoRegAltName);
@@ -177,12 +180,15 @@ public:
unsigned PrintMethodIdx,
const MCSubtargetInfo &STI,
raw_ostream &O) override;
+
StringRef getRegName(unsigned RegNo) const override {
return getRegisterName(RegNo);
}
+
static const char *getRegisterName(unsigned RegNo,
unsigned AltIdx = AArch64::NoRegAltName);
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 14c0327f5fa8..ebf05ae303dd 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -73,7 +73,7 @@ public:
}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
bool mayNeedRelaxation(const MCInst &Inst) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
@@ -138,15 +138,15 @@ static unsigned AdrImmBits(unsigned Value) {
}
static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- MCContext *Ctx) {
+ MCContext &Ctx) {
unsigned Kind = Fixup.getKind();
int64_t SignedValue = static_cast<int64_t>(Value);
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
case AArch64::fixup_aarch64_pcrel_adr_imm21:
- if (Ctx && (SignedValue > 2097151 || SignedValue < -2097152))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
+ if (SignedValue > 2097151 || SignedValue < -2097152)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
return AdrImmBits(Value & 0x1fffffULL);
case AArch64::fixup_aarch64_pcrel_adrp_imm21:
return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
@@ -154,66 +154,65 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case AArch64::fixup_aarch64_pcrel_branch19:
// Signed 21-bit immediate
if (SignedValue > 2097151 || SignedValue < -2097152)
- if (Ctx) Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0x3))
- Ctx->reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
// Low two bits are not encoded.
return (Value >> 2) & 0x7ffff;
case AArch64::fixup_aarch64_add_imm12:
case AArch64::fixup_aarch64_ldst_imm12_scale1:
// Unsigned 12-bit immediate
- if (Ctx && Value >= 0x1000)
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value >= 0x1000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
return Value;
case AArch64::fixup_aarch64_ldst_imm12_scale2:
// Unsigned 12-bit immediate which gets multiplied by 2
- if (Ctx && (Value >= 0x2000))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0x1))
- Ctx->reportError(Fixup.getLoc(), "fixup must be 2-byte aligned");
+ if (Value >= 0x2000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0x1)
+ Ctx.reportError(Fixup.getLoc(), "fixup must be 2-byte aligned");
return Value >> 1;
case AArch64::fixup_aarch64_ldst_imm12_scale4:
// Unsigned 12-bit immediate which gets multiplied by 4
- if (Ctx && (Value >= 0x4000))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0x3))
- Ctx->reportError(Fixup.getLoc(), "fixup must be 4-byte aligned");
+ if (Value >= 0x4000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup must be 4-byte aligned");
return Value >> 2;
case AArch64::fixup_aarch64_ldst_imm12_scale8:
// Unsigned 12-bit immediate which gets multiplied by 8
- if (Ctx && (Value >= 0x8000))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0x7))
- Ctx->reportError(Fixup.getLoc(), "fixup must be 8-byte aligned");
+ if (Value >= 0x8000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0x7)
+ Ctx.reportError(Fixup.getLoc(), "fixup must be 8-byte aligned");
return Value >> 3;
case AArch64::fixup_aarch64_ldst_imm12_scale16:
// Unsigned 12-bit immediate which gets multiplied by 16
- if (Ctx && (Value >= 0x10000))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0xf))
- Ctx->reportError(Fixup.getLoc(), "fixup must be 16-byte aligned");
+ if (Value >= 0x10000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0xf)
+ Ctx.reportError(Fixup.getLoc(), "fixup must be 16-byte aligned");
return Value >> 4;
case AArch64::fixup_aarch64_movw:
- if (Ctx)
- Ctx->reportError(Fixup.getLoc(),
- "no resolvable MOVZ/MOVK fixups supported yet");
+ Ctx.reportError(Fixup.getLoc(),
+ "no resolvable MOVZ/MOVK fixups supported yet");
return Value;
case AArch64::fixup_aarch64_pcrel_branch14:
// Signed 16-bit immediate
- if (Ctx && (SignedValue > 32767 || SignedValue < -32768))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
+ if (SignedValue > 32767 || SignedValue < -32768)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
// Low two bits are not encoded (4-byte alignment assumed).
- if (Ctx && (Value & 0x3))
- Ctx->reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
return (Value >> 2) & 0x3fff;
case AArch64::fixup_aarch64_pcrel_branch26:
case AArch64::fixup_aarch64_pcrel_call26:
// Signed 28-bit immediate
- if (Ctx && (SignedValue > 134217727 || SignedValue < -134217728))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
+ if (SignedValue > 134217727 || SignedValue < -134217728)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
// Low two bits are not encoded (4-byte alignment assumed).
- if (Ctx && (Value & 0x3))
- Ctx->reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
return (Value >> 2) & 0x3ffffff;
case FK_Data_1:
case FK_Data_2:
@@ -264,13 +263,13 @@ unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) con
void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
if (!Value)
return; // Doesn't change encoding.
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
// Apply any target-specific value adjustments.
- Value = adjustFixupValue(Fixup, Value, nullptr);
+ Value = adjustFixupValue(Fixup, Value, Ctx);
// Shift the value into position.
Value <<= Info.TargetOffset;
@@ -521,17 +520,6 @@ public:
return CompactUnwindEncoding;
}
-
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override {
- // Try to get the encoded value for the fixup as-if we're mapping it into
- // the instruction. This allows adjustFixupValue() to issue a diagnostic
- // if the value is invalid.
- if (IsResolved)
- (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
- }
};
} // end anonymous namespace
@@ -575,12 +563,6 @@ void ELFAArch64AsmBackend::processFixupValue(
// to the linker -- a relocation!
if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
IsResolved = false;
-
- // Try to get the encoded value for the fixup as-if we're mapping it into
- // the instruction. This allows adjustFixupValue() to issue a diagnostic
- // if the value is invalid.
- if (IsResolved)
- (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
}
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 685907a2178e..271263507ae1 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -14,27 +14,23 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetStreamer.h"
-#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
@@ -106,8 +102,8 @@ public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) override {
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override {
EmitA64MappingSymbol();
MCELFStreamer::EmitInstruction(Inst, STI);
}
@@ -180,6 +176,7 @@ private:
DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
ElfMappingSymbol LastEMS;
};
+
} // end anonymous namespace
AArch64ELFStreamer &AArch64TargetELFStreamer::getStreamer() {
@@ -191,6 +188,7 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
}
namespace llvm {
+
MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
MCInstPrinter *InstPrint,
@@ -214,4 +212,5 @@ createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
return new AArch64TargetELFStreamer(S);
return nullptr;
}
-}
+
+} // end namespace llvm
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index e9d38d3dcf10..f710065d9bc7 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -84,9 +84,14 @@ static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
// no matter how far away they are.
else if (CM == CodeModel::JITDefault)
CM = CodeModel::Large;
- else if (CM != CodeModel::Small && CM != CodeModel::Large)
- report_fatal_error(
- "Only small and large code models are allowed on AArch64");
+ else if (CM != CodeModel::Small && CM != CodeModel::Large) {
+ if (!TT.isOSFuchsia())
+ report_fatal_error(
+ "Only small and large code models are allowed on AArch64");
+ else if (CM != CodeModel::Kernel)
+ report_fatal_error(
+ "Only small, kernel, and large code models are allowed on AArch64");
+ }
}
static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 53a68527ee8e..3d296ba4806b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -16,14 +16,22 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/MachO.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
namespace {
+
class AArch64MachObjectWriter : public MCMachObjectTargetWriter {
bool getAArch64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType,
const MCSymbolRefExpr *Sym,
@@ -38,7 +46,8 @@ public:
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override;
};
-}
+
+} // end anonymous namespace
bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym,
@@ -51,18 +60,18 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
return false;
case FK_Data_1:
- Log2Size = llvm::Log2_32(1);
+ Log2Size = Log2_32(1);
return true;
case FK_Data_2:
- Log2Size = llvm::Log2_32(2);
+ Log2Size = Log2_32(2);
return true;
case FK_Data_4:
- Log2Size = llvm::Log2_32(4);
+ Log2Size = Log2_32(4);
if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
return true;
case FK_Data_8:
- Log2Size = llvm::Log2_32(8);
+ Log2Size = Log2_32(8);
if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
return true;
@@ -72,7 +81,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
case AArch64::fixup_aarch64_ldst_imm12_scale4:
case AArch64::fixup_aarch64_ldst_imm12_scale8:
case AArch64::fixup_aarch64_ldst_imm12_scale16:
- Log2Size = llvm::Log2_32(4);
+ Log2Size = Log2_32(4);
switch (Sym->getKind()) {
default:
return false;
@@ -87,14 +96,13 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
return true;
}
case AArch64::fixup_aarch64_pcrel_adrp_imm21:
- Log2Size = llvm::Log2_32(4);
+ Log2Size = Log2_32(4);
// This encompasses the relocation for the whole 21-bit value.
switch (Sym->getKind()) {
- default: {
+ default:
Asm.getContext().reportError(Fixup.getLoc(),
"ADR/ADRP relocations must be GOT relative");
return false;
- }
case MCSymbolRefExpr::VK_PAGE:
RelocType = unsigned(MachO::ARM64_RELOC_PAGE21);
return true;
@@ -108,7 +116,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
return true;
case AArch64::fixup_aarch64_pcrel_branch26:
case AArch64::fixup_aarch64_pcrel_call26:
- Log2Size = llvm::Log2_32(4);
+ Log2Size = Log2_32(4);
RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26);
return true;
}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index dcc39176031c..5d76681cd97b 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -266,82 +266,86 @@ inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
}
} // end namespace AArch64CC
+struct SysAlias {
+ const char *Name;
+ uint16_t Encoding;
+ FeatureBitset FeaturesRequired;
+
+ SysAlias (const char *N, uint16_t E) : Name(N), Encoding(E) {};
+ SysAlias (const char *N, uint16_t E, FeatureBitset F) :
+ Name(N), Encoding(E), FeaturesRequired(F) {};
+
+ bool haveFeatures(FeatureBitset ActiveFeatures) const {
+ return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
+ }
+
+ FeatureBitset getRequiredFeatures() const { return FeaturesRequired; }
+};
+
+struct SysAliasReg : SysAlias {
+ bool NeedsReg;
+ SysAliasReg(const char *N, uint16_t E, bool R) : SysAlias(N, E), NeedsReg(R) {};
+};
+
namespace AArch64AT{
- struct AT {
- const char *Name;
- uint16_t Encoding;
+ struct AT : SysAlias {
+ using SysAlias::SysAlias;
};
-
#define GET_AT_DECL
#include "AArch64GenSystemOperands.inc"
-
}
+
namespace AArch64DB {
- struct DB {
- const char *Name;
- uint16_t Encoding;
+ struct DB : SysAlias {
+ using SysAlias::SysAlias;
};
-
#define GET_DB_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64DC {
- struct DC {
- const char *Name;
- uint16_t Encoding;
+ struct DC : SysAlias {
+ using SysAlias::SysAlias;
};
-
#define GET_DC_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64IC {
- struct IC {
- const char *Name;
- uint16_t Encoding;
- bool NeedsReg;
+ struct IC : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
};
#define GET_IC_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64ISB {
- struct ISB {
- const char *Name;
- uint16_t Encoding;
+ struct ISB : SysAlias {
+ using SysAlias::SysAlias;
};
#define GET_ISB_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64PRFM {
- struct PRFM {
- const char *Name;
- uint16_t Encoding;
+ struct PRFM : SysAlias {
+ using SysAlias::SysAlias;
};
#define GET_PRFM_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64PState {
- struct PState {
- const char *Name;
- uint16_t Encoding;
- FeatureBitset FeaturesRequired;
-
- bool haveFeatures(FeatureBitset ActiveFeatures) const {
- return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
- }
+ struct PState : SysAlias{
+ using SysAlias::SysAlias;
};
#define GET_PSTATE_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64PSBHint {
- struct PSB {
- const char *Name;
- uint16_t Encoding;
+ struct PSB : SysAlias {
+ using SysAlias::SysAlias;
};
#define GET_PSB_DECL
#include "AArch64GenSystemOperands.inc"
@@ -451,10 +455,8 @@ namespace AArch64SysReg {
}
namespace AArch64TLBI {
- struct TLBI {
- const char *Name;
- uint16_t Encoding;
- bool NeedsReg;
+ struct TLBI : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
};
#define GET_TLBI_DECL
#include "AArch64GenSystemOperands.inc"
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 7b0a7f4b6058..8f6e1e7d8846 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -11,6 +11,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -23,6 +24,7 @@ class Pass;
class Target;
class TargetMachine;
class PassRegistry;
+class Module;
// R600 Passes
FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
@@ -37,6 +39,7 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSIFoldOperandsPass();
+FunctionPass *createSIPeepholeSDWAPass();
FunctionPass *createSILowerI1CopiesPass();
FunctionPass *createSIShrinkInstructionsPass();
FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
@@ -45,21 +48,32 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass();
FunctionPass *createSIFixSGPRCopiesPass();
FunctionPass *createSIDebuggerInsertNopsPass();
FunctionPass *createSIInsertWaitsPass();
+FunctionPass *createSIInsertWaitcntsPass();
FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);
-ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
+ModulePass *createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM = nullptr);
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
+ModulePass *createAMDGPULowerIntrinsicsPass(const TargetMachine *TM = nullptr);
+void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
+extern char &AMDGPULowerIntrinsicsID;
+
void initializeSIFoldOperandsPass(PassRegistry &);
extern char &SIFoldOperandsID;
+void initializeSIPeepholeSDWAPass(PassRegistry &);
+extern char &SIPeepholeSDWAID;
+
void initializeSIShrinkInstructionsPass(PassRegistry&);
extern char &SIShrinkInstructionsID;
void initializeSIFixSGPRCopiesPass(PassRegistry &);
extern char &SIFixSGPRCopiesID;
+void initializeSIFixVGPRCopiesPass(PassRegistry &);
+extern char &SIFixVGPRCopiesID;
+
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
@@ -86,11 +100,11 @@ extern char &AMDGPUPromoteAllocaID;
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &TM,
CodeGenOpt::Level OptLevel);
-ModulePass *createAMDGPUAlwaysInlinePass();
+ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
FunctionPass *createAMDGPUAnnotateUniformValues();
-FunctionPass* createAMDGPUUnifyMetadataPass();
+ModulePass* createAMDGPUUnifyMetadataPass();
void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
extern char &AMDGPUUnifyMetadataID;
@@ -112,6 +126,15 @@ extern char &SIDebuggerInsertNopsID;
void initializeSIInsertWaitsPass(PassRegistry&);
extern char &SIInsertWaitsID;
+void initializeSIInsertWaitcntsPass(PassRegistry&);
+extern char &SIInsertWaitcntsID;
+
+void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
+extern char &AMDGPUUnifyDivergentExitNodesID;
+
+ImmutablePass *createAMDGPUAAWrapperPass();
+void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
+
Target &getTheAMDGPUTarget();
Target &getTheGCNTarget();
@@ -133,43 +156,53 @@ enum TargetIndex {
/// however on the GPU, each address space points to
/// a separate piece of memory that is unique from other
/// memory locations.
-namespace AMDGPUAS {
-enum AddressSpaces : unsigned {
- PRIVATE_ADDRESS = 0, ///< Address space for private memory.
- GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
- CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2)
- LOCAL_ADDRESS = 3, ///< Address space for local memory.
- FLAT_ADDRESS = 4, ///< Address space for flat memory.
- REGION_ADDRESS = 5, ///< Address space for region memory.
- PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
- PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
+struct AMDGPUAS {
+ // The following address space values depend on the triple environment.
+ unsigned PRIVATE_ADDRESS; ///< Address space for private memory.
+ unsigned FLAT_ADDRESS; ///< Address space for flat memory.
+ unsigned REGION_ADDRESS; ///< Address space for region memory.
+
+ // The maximum value for flat, generic, local, private, constant and region.
+ const static unsigned MAX_COMMON_ADDRESS = 5;
+
+ const static unsigned GLOBAL_ADDRESS = 1; ///< Address space for global memory (RAT0, VTX0).
+ const static unsigned CONSTANT_ADDRESS = 2; ///< Address space for constant memory (VTX2)
+ const static unsigned LOCAL_ADDRESS = 3; ///< Address space for local memory.
+ const static unsigned PARAM_D_ADDRESS = 6; ///< Address space for direct addressible parameter memory (CONST0)
+ const static unsigned PARAM_I_ADDRESS = 7; ///< Address space for indirect addressible parameter memory (VTX1)
// Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this
// order to be able to dynamically index a constant buffer, for example:
//
// ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
- CONSTANT_BUFFER_0 = 8,
- CONSTANT_BUFFER_1 = 9,
- CONSTANT_BUFFER_2 = 10,
- CONSTANT_BUFFER_3 = 11,
- CONSTANT_BUFFER_4 = 12,
- CONSTANT_BUFFER_5 = 13,
- CONSTANT_BUFFER_6 = 14,
- CONSTANT_BUFFER_7 = 15,
- CONSTANT_BUFFER_8 = 16,
- CONSTANT_BUFFER_9 = 17,
- CONSTANT_BUFFER_10 = 18,
- CONSTANT_BUFFER_11 = 19,
- CONSTANT_BUFFER_12 = 20,
- CONSTANT_BUFFER_13 = 21,
- CONSTANT_BUFFER_14 = 22,
- CONSTANT_BUFFER_15 = 23,
+ const static unsigned CONSTANT_BUFFER_0 = 8;
+ const static unsigned CONSTANT_BUFFER_1 = 9;
+ const static unsigned CONSTANT_BUFFER_2 = 10;
+ const static unsigned CONSTANT_BUFFER_3 = 11;
+ const static unsigned CONSTANT_BUFFER_4 = 12;
+ const static unsigned CONSTANT_BUFFER_5 = 13;
+ const static unsigned CONSTANT_BUFFER_6 = 14;
+ const static unsigned CONSTANT_BUFFER_7 = 15;
+ const static unsigned CONSTANT_BUFFER_8 = 16;
+ const static unsigned CONSTANT_BUFFER_9 = 17;
+ const static unsigned CONSTANT_BUFFER_10 = 18;
+ const static unsigned CONSTANT_BUFFER_11 = 19;
+ const static unsigned CONSTANT_BUFFER_12 = 20;
+ const static unsigned CONSTANT_BUFFER_13 = 21;
+ const static unsigned CONSTANT_BUFFER_14 = 22;
+ const static unsigned CONSTANT_BUFFER_15 = 23;
// Some places use this if the address space can't be determined.
- UNKNOWN_ADDRESS_SPACE = ~0u
+ const static unsigned UNKNOWN_ADDRESS_SPACE = ~0u;
};
-} // namespace AMDGPUAS
+namespace llvm {
+namespace AMDGPU {
+AMDGPUAS getAMDGPUAS(const Module &M);
+AMDGPUAS getAMDGPUAS(const TargetMachine &TM);
+AMDGPUAS getAMDGPUAS(Triple T);
+} // namespace AMDGPU
+} // namespace llvm
#endif
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 13022009af16..2c7a2d8962d0 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -67,12 +67,24 @@ def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
"Support unaligned global loads and stores"
>;
+def FeatureTrapHandler: SubtargetFeature<"trap-handler",
+ "TrapHandler",
+ "true",
+ "Trap handler support"
+>;
+
def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
"UnalignedScratchAccess",
"true",
"Support unaligned scratch loads and stores"
>;
+def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
+ "HasApertureRegs",
+ "true",
+ "Has Memory Aperture Base and Size Registers"
+>;
+
// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
// XNACK. The current default kernel driver setting is:
// - graphics ring: XNACK disabled
@@ -154,6 +166,12 @@ def FeatureCIInsts : SubtargetFeature<"ci-insts",
"Additional intstructions for CI+"
>;
+def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
+ "GFX9Insts",
+ "true",
+ "Additional intstructions for GFX9+"
+>;
+
def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
"HasSMemRealTime",
"true",
@@ -172,6 +190,12 @@ def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
"Has i16/f16 instructions"
>;
+def FeatureVOP3P : SubtargetFeature<"vop3p",
+ "HasVOP3PInsts",
+ "true",
+ "Has VOP3P packed instructions"
+>;
+
def FeatureMovrel : SubtargetFeature<"movrel",
"HasMovrel",
"true",
@@ -190,16 +214,22 @@ def FeatureScalarStores : SubtargetFeature<"scalar-stores",
"Has store scalar memory instructions"
>;
-//===------------------------------------------------------------===//
-// Subtarget Features (options and debugging)
-//===------------------------------------------------------------===//
+def FeatureSDWA : SubtargetFeature<"sdwa",
+ "HasSDWA",
+ "true",
+ "Support SDWA (Sub-DWORD Addressing) extension"
+>;
-def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
- "FP16Denormals",
+def FeatureDPP : SubtargetFeature<"dpp",
+ "HasDPP",
"true",
- "Enable half precision denormal handling"
+ "Support DPP (Data Parallel Primitives) extension"
>;
+//===------------------------------------------------------------===//
+// Subtarget Features (options and debugging)
+//===------------------------------------------------------------===//
+
// Some instructions do not support denormals despite this flag. Using
// fp32 denormals also causes instructions to run at the double
// precision rate for the device.
@@ -209,13 +239,36 @@ def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
"Enable single precision denormal handling"
>;
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
- "FP64Denormals",
+// Denormal handling for fp64 and fp16 is controlled by the same
+// config register when fp16 supported.
+// TODO: Do we need a separate f16 setting when not legal?
+def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
+ "FP64FP16Denormals",
"true",
- "Enable double precision denormal handling",
+ "Enable double and half precision denormal handling",
[FeatureFP64]
>;
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+ "FP64FP16Denormals",
+ "true",
+ "Enable double and half precision denormal handling",
+ [FeatureFP64, FeatureFP64FP16Denormals]
+>;
+
+def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
+ "FP64FP16Denormals",
+ "true",
+ "Enable half precision denormal handling",
+ [FeatureFP64FP16Denormals]
+>;
+
+def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
+ "DX10Clamp",
+ "true",
+ "clamp modifier clamps NaNs to 0.0"
+>;
+
def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
"FPExceptions",
"true",
@@ -343,7 +396,17 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
- FeatureScalarStores, FeatureInv2PiInlineImm
+ FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA,
+ FeatureDPP
+ ]
+>;
+
+def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
+ [FeatureFP64, FeatureLocalMemorySize65536,
+ FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+ FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
+ FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
+ FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode
]
>;
@@ -399,6 +462,9 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
FeatureLDSBankCount16,
FeatureXNACK]>;
+def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>;
+def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>;
+
//===----------------------------------------------------------------------===//
// Debugger related subtarget features.
//===----------------------------------------------------------------------===//
@@ -504,14 +570,27 @@ def isVI : Predicate <
"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
AssemblerPredicate<"FeatureGCN3Encoding">;
+def isGFX9 : Predicate <
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
+ AssemblerPredicate<"FeatureGFX9Insts">;
+
+// TODO: Either the name to be changed or we simply use IsCI!
def isCIVI : Predicate <
- "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
- "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
->, AssemblerPredicate<"FeatureCIInsts">;
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
+ AssemblerPredicate<"FeatureCIInsts">;
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
-def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">;
+def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
+ AssemblerPredicate<"Feature16BitInsts">;
+def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
+ AssemblerPredicate<"FeatureVOP3P">;
+
+def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
+ AssemblerPredicate<"FeatureSDWA">;
+
+def HasDPP : Predicate<"Subtarget->hasDPP()">,
+ AssemblerPredicate<"FeatureDPP">;
class PredicateControl {
Predicate SubtargetPredicate;
@@ -532,5 +611,6 @@ include "Processors.td"
include "AMDGPUInstrInfo.td"
include "AMDGPUIntrinsics.td"
include "AMDGPURegisterInfo.td"
+include "AMDGPURegisterBanks.td"
include "AMDGPUInstructions.td"
include "AMDGPUCallingConv.td"
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
new file mode 100644
index 000000000000..3c99f48e818a
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -0,0 +1,147 @@
+//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the AMGPU address space based alias analysis pass.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUAliasAnalysis.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-aa"
+
+// Register this pass...
+char AMDGPUAAWrapperPass::ID = 0;
+INITIALIZE_PASS(AMDGPUAAWrapperPass, "amdgpu-aa",
+ "AMDGPU Address space based Alias Analysis", false, true)
+
+ImmutablePass *llvm::createAMDGPUAAWrapperPass() {
+ return new AMDGPUAAWrapperPass();
+}
+
+void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+}
+
+// Must match the table in getAliasResult.
+AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_)
+ : Arch(Arch_), AS(AS_) {
+ // These arrarys are indexed by address space value
+ // enum elements 0 ... to 5
+ static const AliasResult ASAliasRulesPrivIsZero[6][6] = {
+ /* Private Global Constant Group Flat Region*/
+ /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
+ /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias, NoAlias},
+ /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias},
+ /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
+ /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
+ /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}
+ };
+ static const AliasResult ASAliasRulesGenIsZero[6][6] = {
+ /* Flat Global Constant Group Region Private */
+ /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
+ /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
+ /* Constant */ {MayAlias, NoAlias , MayAlias, NoAlias , NoAlias, NoAlias},
+ /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
+ /* Region */ {MayAlias, NoAlias , NoAlias , NoAlias, MayAlias, NoAlias},
+ /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias}
+ };
+ assert(AS.MAX_COMMON_ADDRESS <= 5);
+ if (AS.FLAT_ADDRESS == 0) {
+ assert(AS.GLOBAL_ADDRESS == 1 &&
+ AS.REGION_ADDRESS == 4 &&
+ AS.LOCAL_ADDRESS == 3 &&
+ AS.CONSTANT_ADDRESS == 2 &&
+ AS.PRIVATE_ADDRESS == 5);
+ ASAliasRules = &ASAliasRulesGenIsZero;
+ } else {
+ assert(AS.PRIVATE_ADDRESS == 0 &&
+ AS.GLOBAL_ADDRESS == 1 &&
+ AS.CONSTANT_ADDRESS == 2 &&
+ AS.LOCAL_ADDRESS == 3 &&
+ AS.FLAT_ADDRESS == 4 &&
+ AS.REGION_ADDRESS == 5);
+ ASAliasRules = &ASAliasRulesPrivIsZero;
+ }
+}
+
+AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1,
+ unsigned AS2) const {
+ if (AS1 > AS.MAX_COMMON_ADDRESS || AS2 > AS.MAX_COMMON_ADDRESS) {
+ if (Arch == Triple::amdgcn)
+ report_fatal_error("Pointer address space out of range");
+ return AS1 == AS2 ? MayAlias : NoAlias;
+ }
+
+ return (*ASAliasRules)[AS1][AS2];
+}
+
+AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
+ unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
+
+ AliasResult Result = ASAliasRules.getAliasResult(asA, asB);
+ if (Result == NoAlias) return Result;
+
+ // Forward the query to the next alias analysis.
+ return AAResultBase::alias(LocA, LocB);
+}
+
+bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) {
+ const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
+
+ if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS) {
+ return true;
+ }
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
+ if (GV->isConstant())
+ return true;
+ } else if (const Argument *Arg = dyn_cast<Argument>(Base)) {
+ const Function *F = Arg->getParent();
+
+ // Only assume constant memory for arguments on kernels.
+ switch (F->getCallingConv()) {
+ default:
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ break;
+ }
+
+ unsigned ArgNo = Arg->getArgNo();
+ /* On an argument, ReadOnly attribute indicates that the function does
+ not write through this pointer argument, even though it may write
+ to the memory that the pointer points to.
+ On an argument, ReadNone attribute indicates that the function does
+ not dereference that pointer argument, even though it may read or write
+ the memory that the pointer points to if accessed through other pointers.
+ */
+ if (F->hasParamAttribute(ArgNo, Attribute::NoAlias) &&
+ (F->hasParamAttribute(ArgNo, Attribute::ReadNone) ||
+ F->hasParamAttribute(ArgNo, Attribute::ReadOnly))) {
+ return true;
+ }
+ }
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+}
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
new file mode 100644
index 000000000000..5f8ed9b1f9a3
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -0,0 +1,102 @@
+//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the AMGPU address space based alias analysis pass.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
+#define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
+
+#include "AMDGPU.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A simple AA result that uses TBAA metadata to answer queries.
+class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
+ friend AAResultBase<AMDGPUAAResult>;
+
+ const DataLayout &DL;
+ AMDGPUAS AS;
+
+public:
+ explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(),
+ DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS, T.getArch()) {}
+ AMDGPUAAResult(AMDGPUAAResult &&Arg)
+ : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS),
+ ASAliasRules(Arg.ASAliasRules){}
+
+ /// Handle invalidation events from the new pass manager.
+ ///
+ /// By definition, this result is stateless and so remains valid.
+ bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+
+private:
+ bool Aliases(const MDNode *A, const MDNode *B) const;
+ bool PathAliases(const MDNode *A, const MDNode *B) const;
+
+ class ASAliasRulesTy {
+ public:
+ ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_);
+ AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
+ private:
+ Triple::ArchType Arch;
+ AMDGPUAS AS;
+ const AliasResult (*ASAliasRules)[6][6];
+ } ASAliasRules;
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class AMDGPUAA : public AnalysisInfoMixin<AMDGPUAA> {
+ friend AnalysisInfoMixin<AMDGPUAA>;
+ static char PassID;
+
+public:
+ typedef AMDGPUAAResult Result;
+
+ AMDGPUAAResult run(Function &F, AnalysisManager<Function> &AM) {
+ return AMDGPUAAResult(F.getParent()->getDataLayout(),
+ Triple(F.getParent()->getTargetTriple()));
+ }
+};
+
+/// Legacy wrapper pass to provide the AMDGPUAAResult object.
+class AMDGPUAAWrapperPass : public ImmutablePass {
+ std::unique_ptr<AMDGPUAAResult> Result;
+
+public:
+ static char ID;
+
+ AMDGPUAAWrapperPass() : ImmutablePass(ID) {
+ initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ AMDGPUAAResult &getResult() { return *Result; }
+ const AMDGPUAAResult &getResult() const { return *Result; }
+
+ bool doInitialization(Module &M) override {
+ Result.reset(new AMDGPUAAResult(M.getDataLayout(),
+ Triple(M.getTargetTriple())));
+ return false;
+ }
+ bool doFinalization(Module &M) override {
+ Result.reset();
+ return false;
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+}
+#endif // LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 067a16a2af7f..1d03714874e2 100644
--- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -24,8 +24,10 @@ namespace {
class AMDGPUAlwaysInline : public ModulePass {
static char ID;
+ bool GlobalOpt;
+
public:
- AMDGPUAlwaysInline() : ModulePass(ID) { }
+ AMDGPUAlwaysInline(bool GlobalOpt) : ModulePass(ID), GlobalOpt(GlobalOpt) { }
bool runOnModule(Module &M) override;
StringRef getPassName() const override { return "AMDGPU Always Inline Pass"; }
};
@@ -45,8 +47,10 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
}
}
- for (GlobalAlias* A : AliasesToRemove) {
- A->eraseFromParent();
+ if (GlobalOpt) {
+ for (GlobalAlias* A : AliasesToRemove) {
+ A->eraseFromParent();
+ }
}
for (Function &F : M) {
@@ -70,6 +74,6 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
return false;
}
-ModulePass *llvm::createAMDGPUAlwaysInlinePass() {
- return new AMDGPUAlwaysInline();
+ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
+ return new AMDGPUAlwaysInline(GlobalOpt);
}
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index c98d25e20185..3d8db7cd8af5 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
@@ -26,7 +27,9 @@ namespace {
class AMDGPUAnnotateKernelFeatures : public ModulePass {
private:
- static bool hasAddrSpaceCast(const Function &F);
+ const TargetMachine *TM;
+ AMDGPUAS AS;
+ static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS);
void addAttrToCallers(Function *Intrin, StringRef AttrName);
bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
@@ -34,7 +37,8 @@ private:
public:
static char ID;
- AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
+ AMDGPUAnnotateKernelFeatures(const TargetMachine *TM_ = nullptr) :
+ ModulePass(ID), TM(TM_) {}
bool runOnModule(Module &M) override;
StringRef getPassName() const override {
return "AMDGPU Annotate Kernel Features";
@@ -45,10 +49,11 @@ public:
ModulePass::getAnalysisUsage(AU);
}
- static bool visitConstantExpr(const ConstantExpr *CE);
+ static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
static bool visitConstantExprsRecursively(
const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
+ SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
+ AMDGPUAS AS);
};
}
@@ -62,18 +67,20 @@ INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
// The queue ptr is only needed when casting to flat, not from it.
-static bool castRequiresQueuePtr(unsigned SrcAS) {
- return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
+static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
+ return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
}
-static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
- return castRequiresQueuePtr(ASC->getSrcAddressSpace());
+static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
+ const AMDGPUAS &AS) {
+ return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
}
-bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
+bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
+ AMDGPUAS AS) {
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
- return castRequiresQueuePtr(SrcAS);
+ return castRequiresQueuePtr(SrcAS, AS);
}
return false;
@@ -81,7 +88,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
+ SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
+ AMDGPUAS AS) {
if (!ConstantExprVisited.insert(EntryC).second)
return false;
@@ -94,7 +102,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
// Check this constant expression.
if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
- if (visitConstantExpr(CE))
+ if (visitConstantExpr(CE, AS))
return true;
}
@@ -115,13 +123,14 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
}
// Return true if an addrspacecast is used that requires the queue ptr.
-bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
+bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F,
+ AMDGPUAS AS) {
SmallPtrSet<const Constant *, 8> ConstantExprVisited;
for (const BasicBlock &BB : F) {
for (const Instruction &I : BB) {
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
- if (castRequiresQueuePtr(ASC))
+ if (castRequiresQueuePtr(ASC, AS))
return true;
}
@@ -130,7 +139,7 @@ bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
if (!OpC)
continue;
- if (visitConstantExprsRecursively(OpC, ConstantExprVisited))
+ if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS))
return true;
}
}
@@ -170,6 +179,7 @@ bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
Triple TT(M.getTargetTriple());
+ AS = AMDGPU::getAMDGPUAS(M);
static const StringRef IntrinsicToAttr[][2] = {
// .x omitted
@@ -190,7 +200,9 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
static const StringRef HSAIntrinsicToAttr[][2] = {
{ "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
{ "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
- { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }
+ { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" },
+ { "llvm.trap", "amdgpu-queue-ptr" },
+ { "llvm.debugtrap", "amdgpu-queue-ptr" }
};
// TODO: We should not add the attributes if the known compile time workgroup
@@ -209,7 +221,9 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
if (F.hasFnAttribute("amdgpu-queue-ptr"))
continue;
- if (hasAddrSpaceCast(F))
+ bool HasApertureRegs =
+ TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
+ if (!HasApertureRegs && hasAddrSpaceCast(F, AS))
F.addFnAttr("amdgpu-queue-ptr");
}
}
@@ -217,6 +231,6 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
return Changed;
}
-ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
- return new AMDGPUAnnotateKernelFeatures();
+ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM) {
+ return new AMDGPUAnnotateKernelFeatures(TM);
}
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index c011be6fa169..91b3649f5c39 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -37,6 +37,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass,
LoopInfo *LI;
DenseMap<Value*, GetElementPtrInst*> noClobberClones;
bool isKernelFunc;
+ AMDGPUAS AMDGPUASI;
public:
static char ID;
@@ -130,8 +131,8 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
Value *Ptr = I.getPointerOperand();
if (!DA->isUniform(Ptr))
return;
- auto isGlobalLoad = [](LoadInst &Load)->bool {
- return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+ auto isGlobalLoad = [&](LoadInst &Load)->bool {
+ return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
};
// We're tracking up to the Function boundaries
// We cannot go beyond because of FunctionPass restrictions
@@ -166,6 +167,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
}
bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
+ AMDGPUASI = AMDGPU::getAMDGPUAS(M);
return false;
}
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 974e79fff3d7..0446655830d1 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -17,11 +17,11 @@
//
#include "AMDGPUAsmPrinter.h"
+#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "AMDGPU.h"
-#include "AMDKernelCodeT.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
@@ -93,33 +93,40 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)) {}
+ : AsmPrinter(TM, std::move(Streamer)) {
+ AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
+ }
StringRef AMDGPUAsmPrinter::getPassName() const {
return "AMDGPU Assembly Printer";
}
+const MCSubtargetInfo* AMDGPUAsmPrinter::getSTI() const {
+ return TM.getMCSubtargetInfo();
+}
+
+AMDGPUTargetStreamer& AMDGPUAsmPrinter::getTargetStreamer() const {
+ return static_cast<AMDGPUTargetStreamer&>(*OutStreamer->getTargetStreamer());
+}
+
void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
return;
- // Need to construct an MCSubtargetInfo here in case we have no functions
- // in the module.
- std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
- TM.getTargetTriple().str(), TM.getTargetCPU(),
- TM.getTargetFeatureString()));
-
- AMDGPUTargetStreamer *TS =
- static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
- TS->EmitDirectiveHSACodeObjectVersion(2, 1);
+ getTargetStreamer().EmitDirectiveHSACodeObjectVersion(2, 1);
+ getTargetStreamer().EmitDirectiveHSACodeObjectISA(
+ ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
+ getTargetStreamer().EmitStartOfCodeObjectMetadata(M);
+}
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
- TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
- "AMD", "AMDGPU");
+void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
+ return;
- // Emit runtime metadata.
- TS->EmitRuntimeMetadata(M);
+ getTargetStreamer().EmitEndOfCodeObjectMetadata();
}
bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
@@ -136,25 +143,32 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
}
-
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
+ amd_kernel_code_t KernelCode;
if (STM.isAmdCodeObjectV2(*MF)) {
getSIProgramInfo(KernelInfo, *MF);
- EmitAmdKernelCodeT(*MF, KernelInfo);
+ getAmdKernelCode(KernelCode, KernelInfo, *MF);
+
+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+ getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
}
+
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
+ return;
+ getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction(),
+ KernelCode);
}
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
- if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
- AMDGPUTargetStreamer *TS =
- static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(*MF)) {
SmallString<128> SymbolName;
getNameWithPrefix(SymbolName, MF->getFunction()),
- TS->EmitAMDGPUSymbolType(SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
+ getTargetStreamer().EmitAMDGPUSymbolType(
+ SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
}
AsmPrinter::EmitFunctionEntryLabel();
@@ -163,7 +177,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Group segment variables aren't emitted in HSA.
- if (AMDGPU::isGroupSegment(GV))
+ if (AMDGPU::isGroupSegment(GV, AMDGPUASI))
return;
AsmPrinter::EmitGlobalVariable(GV);
@@ -247,6 +261,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
+ Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)),
+ false);
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
false);
@@ -382,6 +399,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
case AMDGPU::EXEC_HI:
case AMDGPU::SCC:
case AMDGPU::M0:
+ case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_PRIVATE_LIMIT:
continue;
case AMDGPU::VCC:
@@ -478,33 +499,20 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ExtraSGPRs = 6;
}
- // Record first reserved register and reserved register count fields, and
- // update max register counts if "amdgpu-debugger-reserve-regs" attribute was
- // requested.
- ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
- ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM);
-
- // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
- // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
- // attribute was requested.
- if (STM.debuggerEmitPrologue()) {
- ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
- RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
- ProgInfo.DebuggerPrivateSegmentBufferSGPR =
- RI->getHWRegIndex(MFI->getScratchRSrcReg());
- }
+ unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF);
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
!STM.hasSGPRInitBug()) {
- unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs();
+ unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {
// This can happen due to a compiler bug or when using inline asm.
LLVMContext &Ctx = MF.getFunction()->getContext();
DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
"addressable scalar registers",
MaxSGPR + 1, DS_Error,
- DK_ResourceLimit, MaxAddressableNumSGPRs);
+ DK_ResourceLimit,
+ MaxAddressableNumSGPRs);
Ctx.diagnose(Diag);
MaxSGPR = MaxAddressableNumSGPRs - 1;
}
@@ -512,41 +520,43 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// Account for extra SGPRs and VGPRs reserved for debugger use.
MaxSGPR += ExtraSGPRs;
- MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM);
+ MaxVGPR += ExtraVGPRs;
// We found the maximum register index. They start at 0, so add one to get the
// number of registers.
- ProgInfo.NumVGPR = MaxVGPR + 1;
ProgInfo.NumSGPR = MaxSGPR + 1;
+ ProgInfo.NumVGPR = MaxVGPR + 1;
// Adjust number of registers used to meet default/requested minimum/maximum
// number of waves per execution unit request.
ProgInfo.NumSGPRsForWavesPerEU = std::max(
- ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU()));
+ ProgInfo.NumSGPR, STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
ProgInfo.NumVGPRsForWavesPerEU = std::max(
- ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU()));
+ ProgInfo.NumVGPR, STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
STM.hasSGPRInitBug()) {
- unsigned MaxNumSGPRs = STM.getMaxNumSGPRs();
- if (ProgInfo.NumSGPR > MaxNumSGPRs) {
- // This can happen due to a compiler bug or when using inline asm to use the
- // registers which are usually reserved for vcc etc.
-
+ unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
+ if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
+ // This can happen due to a compiler bug or when using inline asm to use
+ // the registers which are usually reserved for vcc etc.
LLVMContext &Ctx = MF.getFunction()->getContext();
DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
"scalar registers",
ProgInfo.NumSGPR, DS_Error,
- DK_ResourceLimit, MaxNumSGPRs);
+ DK_ResourceLimit,
+ MaxAddressableNumSGPRs);
Ctx.diagnose(Diag);
- ProgInfo.NumSGPR = MaxNumSGPRs;
- ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;
+ ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
+ ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
}
}
if (STM.hasSGPRInitBug()) {
- ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
- ProgInfo.NumSGPRsForWavesPerEU = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ ProgInfo.NumSGPR =
+ AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+ ProgInfo.NumSGPRsForWavesPerEU =
+ AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
}
if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
@@ -565,13 +575,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// SGPRBlocks is actual number of SGPR blocks minus 1.
ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU,
- RI->getSGPRAllocGranule());
- ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1;
+ STM.getSGPREncodingGranule());
+ ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / STM.getSGPREncodingGranule() - 1;
// VGPRBlocks is actual number of VGPR blocks minus 1.
ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU,
- RI->getVGPRAllocGranule());
- ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1;
+ STM.getVGPREncodingGranule());
+ ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1;
+
+ // Record first reserved VGPR and number of reserved VGPRs.
+ ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
+ ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);
+
+ // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
+ // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
+ // attribute was requested.
+ if (STM.debuggerEmitPrologue()) {
+ ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
+ RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
+ ProgInfo.DebuggerPrivateSegmentBufferSGPR =
+ RI->getHWRegIndex(MFI->getScratchRSrcReg());
+ }
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
// register.
@@ -580,7 +604,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.IEEEMode = STM.enableIEEEBit(MF);
// Make clamp modifier on NaN input returns 0.
- ProgInfo.DX10Clamp = 1;
+ ProgInfo.DX10Clamp = STM.enableDX10Clamp();
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
ProgInfo.ScratchSize = FrameInfo.getStackSize();
@@ -635,6 +659,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.ComputePGMRSrc2 =
S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
+ S_00B84C_TRAP_HANDLER(STM.isTrapHandlerEnabled()) |
S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
@@ -688,7 +713,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
- OutStreamer->EmitIntValue(MFI->PSInputEna, 4);
+ OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
}
@@ -713,97 +738,88 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
}
}
-void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
- const SIProgramInfo &KernelInfo) const {
+void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
+ const SIProgramInfo &KernelInfo,
+ const MachineFunction &MF) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
- amd_kernel_code_t header;
- AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
+ AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
- header.compute_pgm_resource_registers =
+ Out.compute_pgm_resource_registers =
KernelInfo.ComputePGMRSrc1 |
(KernelInfo.ComputePGMRSrc2 << 32);
- header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
-
+ Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
- AMD_HSA_BITS_SET(header.code_properties,
+ AMD_HSA_BITS_SET(Out.code_properties,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
getElementByteSizeValue(STM.getMaxPrivateElementSize()));
if (MFI->hasPrivateSegmentBuffer()) {
- header.code_properties |=
+ Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
if (MFI->hasDispatchPtr())
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (MFI->hasQueuePtr())
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
if (MFI->hasKernargSegmentPtr())
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
if (MFI->hasDispatchID())
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
if (MFI->hasFlatScratchInit())
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
-
- // TODO: Private segment size
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
if (MFI->hasGridWorkgroupCountX()) {
- header.code_properties |=
+ Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;
}
if (MFI->hasGridWorkgroupCountY()) {
- header.code_properties |=
+ Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;
}
if (MFI->hasGridWorkgroupCountZ()) {
- header.code_properties |=
+ Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;
}
if (MFI->hasDispatchPtr())
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.debuggerSupported())
- header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
+ Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
if (STM.isXNACKEnabled())
- header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
+ Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
// FIXME: Should use getKernArgSize
- header.kernarg_segment_byte_size =
+ Out.kernarg_segment_byte_size =
STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
- header.wavefront_sgpr_count = KernelInfo.NumSGPR;
- header.workitem_vgpr_count = KernelInfo.NumVGPR;
- header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
- header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
- header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
- header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
+ Out.wavefront_sgpr_count = KernelInfo.NumSGPR;
+ Out.workitem_vgpr_count = KernelInfo.NumVGPR;
+ Out.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
+ Out.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
+ Out.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
+ Out.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
- header.kernarg_segment_alignment = std::max((size_t)4,
+ Out.kernarg_segment_alignment = std::max((size_t)4,
countTrailingZeros(MFI->getMaxKernArgAlign()));
if (STM.debuggerEmitPrologue()) {
- header.debug_wavefront_private_segment_offset_sgpr =
+ Out.debug_wavefront_private_segment_offset_sgpr =
KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
- header.debug_private_segment_buffer_sgpr =
+ Out.debug_private_segment_buffer_sgpr =
KernelInfo.DebuggerPrivateSegmentBufferSGPR;
}
-
- AMDGPUTargetStreamer *TS =
- static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
-
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
- TS->EmitAMDKernelCodeT(header);
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 9a4bafef3a25..13425c8b2a0f 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -15,95 +15,84 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
-#include "AMDGPUMCInstLower.h"
-
+#include "AMDKernelCodeT.h"
+#include "AMDGPU.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
#include <vector>
namespace llvm {
+
+class AMDGPUTargetStreamer;
class MCOperand;
class AMDGPUAsmPrinter final : public AsmPrinter {
private:
struct SIProgramInfo {
- SIProgramInfo() :
- VGPRBlocks(0),
- SGPRBlocks(0),
- Priority(0),
- FloatMode(0),
- Priv(0),
- DX10Clamp(0),
- DebugMode(0),
- IEEEMode(0),
- ScratchSize(0),
- ComputePGMRSrc1(0),
- LDSBlocks(0),
- ScratchBlocks(0),
- ComputePGMRSrc2(0),
- NumVGPR(0),
- NumSGPR(0),
- FlatUsed(false),
- NumSGPRsForWavesPerEU(0),
- NumVGPRsForWavesPerEU(0),
- ReservedVGPRFirst(0),
- ReservedVGPRCount(0),
- DebuggerWavefrontPrivateSegmentOffsetSGPR((uint16_t)-1),
- DebuggerPrivateSegmentBufferSGPR((uint16_t)-1),
- VCCUsed(false),
- CodeLen(0) {}
-
// Fields set in PGM_RSRC1 pm4 packet.
- uint32_t VGPRBlocks;
- uint32_t SGPRBlocks;
- uint32_t Priority;
- uint32_t FloatMode;
- uint32_t Priv;
- uint32_t DX10Clamp;
- uint32_t DebugMode;
- uint32_t IEEEMode;
- uint32_t ScratchSize;
-
- uint64_t ComputePGMRSrc1;
+ uint32_t VGPRBlocks = 0;
+ uint32_t SGPRBlocks = 0;
+ uint32_t Priority = 0;
+ uint32_t FloatMode = 0;
+ uint32_t Priv = 0;
+ uint32_t DX10Clamp = 0;
+ uint32_t DebugMode = 0;
+ uint32_t IEEEMode = 0;
+ uint32_t ScratchSize = 0;
+
+ uint64_t ComputePGMRSrc1 = 0;
// Fields set in PGM_RSRC2 pm4 packet.
- uint32_t LDSBlocks;
- uint32_t ScratchBlocks;
+ uint32_t LDSBlocks = 0;
+ uint32_t ScratchBlocks = 0;
- uint64_t ComputePGMRSrc2;
+ uint64_t ComputePGMRSrc2 = 0;
- uint32_t NumVGPR;
- uint32_t NumSGPR;
+ uint32_t NumVGPR = 0;
+ uint32_t NumSGPR = 0;
uint32_t LDSSize;
- bool FlatUsed;
+ bool FlatUsed = false;
// Number of SGPRs that meets number of waves per execution unit request.
- uint32_t NumSGPRsForWavesPerEU;
+ uint32_t NumSGPRsForWavesPerEU = 0;
// Number of VGPRs that meets number of waves per execution unit request.
- uint32_t NumVGPRsForWavesPerEU;
+ uint32_t NumVGPRsForWavesPerEU = 0;
// If ReservedVGPRCount is 0 then must be 0. Otherwise, this is the first
// fixed VGPR number reserved.
- uint16_t ReservedVGPRFirst;
+ uint16_t ReservedVGPRFirst = 0;
// The number of consecutive VGPRs reserved.
- uint16_t ReservedVGPRCount;
+ uint16_t ReservedVGPRCount = 0;
// Fixed SGPR number used to hold wave scratch offset for entire kernel
- // execution, or uint16_t(-1) if the register is not used or not known.
- uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR;
+ // execution, or std::numeric_limits<uint16_t>::max() if the register is not
+ // used or not known.
+ uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
+ std::numeric_limits<uint16_t>::max();
// Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
- // kernel execution, or uint16_t(-1) if the register is not used or not
- // known.
- uint16_t DebuggerPrivateSegmentBufferSGPR;
+ // kernel execution, or std::numeric_limits<uint16_t>::max() if the register
+ // is not used or not known.
+ uint16_t DebuggerPrivateSegmentBufferSGPR =
+ std::numeric_limits<uint16_t>::max();
// Bonus information for debugging.
- bool VCCUsed;
- uint64_t CodeLen;
+ bool VCCUsed = false;
+ uint64_t CodeLen = 0;
+
+ SIProgramInfo() = default;
};
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const;
+ void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
+ const MachineFunction &MF) const;
void findNumUsedRegistersSI(const MachineFunction &MF,
unsigned &NumSGPR,
unsigned &NumVGPR) const;
@@ -112,21 +101,28 @@ private:
/// can correctly setup the GPU state.
void EmitProgramInfoR600(const MachineFunction &MF);
void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
- void EmitAmdKernelCodeT(const MachineFunction &MF,
- const SIProgramInfo &KernelInfo) const;
public:
explicit AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer);
- bool runOnMachineFunction(MachineFunction &MF) override;
-
StringRef getPassName() const override;
+ const MCSubtargetInfo* getSTI() const;
+
+ AMDGPUTargetStreamer& getTargetStreamer() const;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
/// \brief Wrapper for MCInstLowering.lowerOperand() for the tblgen'erated
/// pseudo lowering.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+ /// \brief Lower the specified LLVM Constant to an MCExpr.
+ /// The AsmPrinter::lowerConstantof does not know how to lower
+ /// addrspacecast, therefore they should be lowered by this function.
+ const MCExpr *lowerConstant(const Constant *CV) override;
+
/// \brief tblgen'erated driver function for lowering simple MI->MC pseudo
/// instructions.
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
@@ -143,6 +139,8 @@ public:
void EmitStartOfAsmFile(Module &M) override;
+ void EmitEndOfAsmFile(Module &M) override;
+
bool isBlockOnlyReachableByFallthrough(
const MachineBasicBlock *MBB) const override;
@@ -153,8 +151,9 @@ public:
protected:
std::vector<std::string> DisasmLines, HexLines;
size_t DisasmLineMaxLen;
+ AMDGPUAS AMDGPUASI;
};
-} // End anonymous llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index d53cc153dc9a..e67ae092fdda 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -14,8 +14,13 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUCallLowering.h"
+#include "AMDGPU.h"
#include "AMDGPUISelLowering.h"
-
+#include "AMDGPUSubtarget.h"
+#include "SIISelLowering.h"
+#include "SIRegisterInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -26,17 +31,138 @@ using namespace llvm;
#endif
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
- : CallLowering(&TLI) {
+ : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
}
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
+ const Value *Val, unsigned VReg) const {
+ MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
return true;
}
+unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
+ Type *ParamTy,
+ unsigned Offset) const {
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const Function &F = *MF.getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
+ LLT PtrType = getLLTForType(*PtrTy, DL);
+ unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
+ unsigned KernArgSegmentPtr =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
+ unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
+
+ unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ MIRBuilder.buildConstant(OffsetReg, Offset);
+
+ MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
+
+ return DstReg;
+}
+
+void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
+ Type *ParamTy, unsigned Offset,
+ unsigned DstReg) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = *MF.getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
+ unsigned Align = DL.getABITypeAlignment(ParamTy);
+ unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
+ MachineMemOperand::MONonTemporal |
+ MachineMemOperand::MOInvariant,
+ TypeSize, Align);
+
+ MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
+}
+
bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<unsigned> VRegs) const {
- // TODO: Implement once there are generic loads/stores.
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const SISubtarget *Subtarget = static_cast<const SISubtarget *>(&MF.getSubtarget());
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+
+ // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
+ if (Info->hasPrivateSegmentBuffer()) {
+ unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
+ MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
+ CCInfo.AllocateReg(PrivateSegmentBufferReg);
+ }
+
+ if (Info->hasDispatchPtr()) {
+ unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
+ // FIXME: Need to add reg as live-in
+ CCInfo.AllocateReg(DispatchPtrReg);
+ }
+
+ if (Info->hasQueuePtr()) {
+ unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
+ // FIXME: Need to add reg as live-in
+ CCInfo.AllocateReg(QueuePtrReg);
+ }
+
+ if (Info->hasKernargSegmentPtr()) {
+ unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
+ const LLT P2 = LLT::pointer(2, 64);
+ unsigned VReg = MRI.createGenericVirtualRegister(P2);
+ MRI.addLiveIn(InputPtrReg, VReg);
+ MIRBuilder.getMBB().addLiveIn(InputPtrReg);
+ MIRBuilder.buildCopy(VReg, InputPtrReg);
+ CCInfo.AllocateReg(InputPtrReg);
+ }
+
+ if (Info->hasDispatchID()) {
+ unsigned DispatchIDReg = Info->addDispatchID(*TRI);
+ // FIXME: Need to add reg as live-in
+ CCInfo.AllocateReg(DispatchIDReg);
+ }
+
+ if (Info->hasFlatScratchInit()) {
+ unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
+ // FIXME: Need to add reg as live-in
+ CCInfo.AllocateReg(FlatScratchInitReg);
+ }
+
+ unsigned NumArgs = F.arg_size();
+ Function::const_arg_iterator CurOrigArg = F.arg_begin();
+ const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
+ for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
+ MVT ValVT = TLI.getValueType(DL, CurOrigArg->getType()).getSimpleVT();
+ ISD::ArgFlagsTy Flags;
+ Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
+ CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
+ /*IsVarArg=*/false);
+ bool Res =
+ AssignFn(i, ValVT, ValVT, CCValAssign::Full, Flags, CCInfo);
+ assert(!Res && "Call operand has unhandled type");
+ (void)Res;
+ }
+
+ Function::const_arg_iterator Arg = F.arg_begin();
+ for (unsigned i = 0; i != NumArgs; ++i, ++Arg) {
+ // FIXME: We should be getting DebugInfo from the arguments some how.
+ CCValAssign &VA = ArgLocs[i];
+ lowerParameter(MIRBuilder, Arg->getType(),
+ VA.getLocMemOffset() +
+ Subtarget->getExplicitKernelArgOffset(MF), VRegs[i]);
+ }
+
return true;
}
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h
index 9ae87c9397ab..09bdf8ffcde7 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
+#include "AMDGPU.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
namespace llvm {
@@ -22,6 +23,14 @@ namespace llvm {
class AMDGPUTargetLowering;
class AMDGPUCallLowering: public CallLowering {
+ AMDGPUAS AMDGPUASI;
+
+ unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
+ unsigned Offset) const;
+
+ void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy,
+ unsigned Offset, unsigned DstReg) const;
+
public:
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
@@ -29,6 +38,7 @@ class AMDGPUCallLowering: public CallLowering {
unsigned VReg) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
};
} // End of namespace llvm;
#endif
diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 47dfa4992068..d308f718aae1 100644
--- a/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -17,7 +17,7 @@ class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
// Calling convention for SI
def CC_SI : CallingConv<[
- CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+ CCIfInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
@@ -25,17 +25,13 @@ def CC_SI : CallingConv<[
SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
]>>>,
- CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
- [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14,
- SGPR16, SGPR18, SGPR20, SGPR22, SGPR24, SGPR26, SGPR28, SGPR30,
- SGPR32, SGPR34, SGPR36, SGPR38 ],
- [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15,
- SGPR17, SGPR19, SGPR21, SGPR23, SGPR25, SGPR27, SGPR29, SGPR31,
- SGPR33, SGPR35, SGPR37, SGPR39 ]
- >>>,
+ // We have no way of referring to the generated register tuples
+ // here, so use a custom function.
+ CCIfInReg<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
+ CCIfByVal<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
// 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
- CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+ CCIfNotInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -53,17 +49,7 @@ def CC_SI : CallingConv<[
VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
- ]>>>,
-
- CCIfByVal<CCIfType<[i64] , CCAssignToRegWithShadow<
- [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14,
- SGPR16, SGPR18, SGPR20, SGPR22, SGPR24, SGPR26, SGPR28, SGPR30,
- SGPR32, SGPR34, SGPR36, SGPR38 ],
- [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15,
- SGPR17, SGPR19, SGPR21, SGPR23, SGPR25, SGPR27, SGPR29, SGPR31,
- SGPR33, SGPR35, SGPR37, SGPR39 ]
- >>>
-
+ ]>>>
]>;
def RetCC_SI : CallingConv<[
@@ -76,7 +62,7 @@ def RetCC_SI : CallingConv<[
]>>,
// 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
- CCIfType<[f32] , CCAssignToReg<[
+ CCIfType<[f32, f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index e6230547a9b3..e19314fe0a6c 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -14,16 +14,31 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <iterator>
#define DEBUG_TYPE "amdgpu-codegenprepare"
@@ -34,17 +49,15 @@ namespace {
class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
const GCNTargetMachine *TM;
- const SISubtarget *ST;
- DivergenceAnalysis *DA;
- Module *Mod;
- bool HasUnsafeFPMath;
+ const SISubtarget *ST = nullptr;
+ DivergenceAnalysis *DA = nullptr;
+ Module *Mod = nullptr;
+ bool HasUnsafeFPMath = false;
/// \brief Copies exact/nsw/nuw flags (if any) from binary operation \p I to
/// binary operation \p V.
///
/// \returns Binary operation \p V.
- Value *copyFlags(const BinaryOperator &I, Value *V) const;
-
/// \returns \p T's base element bit width.
unsigned getBaseElementBitWidth(const Type *T) const;
@@ -113,13 +126,9 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
public:
static char ID;
+
AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
- FunctionPass(ID),
- TM(static_cast<const GCNTargetMachine *>(TM)),
- ST(nullptr),
- DA(nullptr),
- Mod(nullptr),
- HasUnsafeFPMath(false) { }
+ FunctionPass(ID), TM(static_cast<const GCNTargetMachine *>(TM)) {}
bool visitFDiv(BinaryOperator &I);
@@ -142,22 +151,7 @@ public:
}
};
-} // End anonymous namespace
-
-Value *AMDGPUCodeGenPrepare::copyFlags(
- const BinaryOperator &I, Value *V) const {
- BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V);
- if (!BinOp) // Possibly constant expression.
- return V;
-
- if (isa<OverflowingBinaryOperator>(BinOp)) {
- BinOp->setHasNoSignedWrap(I.hasNoSignedWrap());
- BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
- } else if (isa<PossiblyExactOperator>(BinOp))
- BinOp->setIsExact(I.isExact());
-
- return V;
-}
+} // end anonymous namespace
unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const {
assert(needsPromotionToI32(T) && "T does not need promotion to i32");
@@ -186,12 +180,48 @@ bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
}
bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
- if (T->isIntegerTy() && T->getIntegerBitWidth() > 1 &&
- T->getIntegerBitWidth() <= 16)
+ const IntegerType *IntTy = dyn_cast<IntegerType>(T);
+ if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16)
+ return true;
+
+ if (const VectorType *VT = dyn_cast<VectorType>(T)) {
+ // TODO: The set of packed operations is more limited, so may want to
+ // promote some anyway.
+ if (ST->hasVOP3PInsts())
+ return false;
+
+ return needsPromotionToI32(VT->getElementType());
+ }
+
+ return false;
+}
+
+// Return true if the op promoted to i32 should have nsw set.
+static bool promotedOpIsNSW(const Instruction &I) {
+ switch (I.getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::Add:
+ case Instruction::Sub:
+ return true;
+ case Instruction::Mul:
+ return I.hasNoUnsignedWrap();
+ default:
+ return false;
+ }
+}
+
+// Return true if the op promoted to i32 should have nuw set.
+static bool promotedOpIsNUW(const Instruction &I) {
+ switch (I.getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::Add:
+ case Instruction::Mul:
return true;
- if (!T->isVectorTy())
+ case Instruction::Sub:
+ return I.hasNoUnsignedWrap();
+ default:
return false;
- return needsPromotionToI32(cast<VectorType>(T)->getElementType());
+ }
}
bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
@@ -218,7 +248,19 @@ bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
}
- ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1));
+
+ ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1);
+ if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) {
+ if (promotedOpIsNSW(cast<Instruction>(I)))
+ Inst->setHasNoSignedWrap();
+
+ if (promotedOpIsNUW(cast<Instruction>(I)))
+ Inst->setHasNoUnsignedWrap();
+
+ if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
+ Inst->setIsExact(ExactOp->isExact());
+ }
+
TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
I.replaceAllUsesWith(TruncRes);
@@ -346,9 +388,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
Builder.setFastMathFlags(FMF);
Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
- const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo();
- Function *Decl
- = II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {});
+ Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
Value *Num = FDiv.getOperand(0);
Value *Den = FDiv.getOperand(1);
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
index 805fb7102a35..e32ca9653b3a 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
@@ -12,11 +12,6 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUFrameLowering.h"
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Support/MathExtras.h"
using namespace llvm;
AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
@@ -69,34 +64,3 @@ unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
// T1.W = stack[1].w
return 1;
}
-
-/// \returns The number of registers allocated for \p FI.
-int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- unsigned &FrameReg) const {
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- const AMDGPURegisterInfo *RI
- = MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo();
-
- // Fill in FrameReg output argument.
- FrameReg = RI->getFrameRegister(MF);
-
- // Start the offset at 2 so we don't overwrite work group information.
- // XXX: We should only do this when the shader actually uses this
- // information.
- unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4);
- int UpperBound = FI == -1 ? MFI.getNumObjects() : FI;
-
- for (int i = MFI.getObjectIndexBegin(); i < UpperBound; ++i) {
- OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(i));
- OffsetBytes += MFI.getObjectSize(i);
- // Each register holds 4 bytes, so we must always align the offset to at
- // least 4 bytes, so that 2 frame objects won't share the same register.
- OffsetBytes = alignTo(OffsetBytes, 4);
- }
-
- if (FI != -1)
- OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(FI));
-
- return OffsetBytes / (getStackWidth(MF) * 4);
-}
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
index 5d51351a00d2..8e187c7e56c1 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -34,9 +34,6 @@ public:
/// values to the stack.
unsigned getStackWidth(const MachineFunction &MF) const;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
-
bool hasFP(const MachineFunction &MF) const override {
return false;
}
diff --git a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
new file mode 100644
index 000000000000..5cb9036f4823
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -0,0 +1,62 @@
+//===- AMDGPUGenRegisterBankInfo.def -----------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines all the static objects used by AMDGPURegisterBankInfo.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+namespace llvm {
+namespace AMDGPU {
+
+enum PartialMappingIdx {
+ None = - 1,
+ PM_SGPR32 = 0,
+ PM_SGPR64 = 1,
+ PM_VGPR32 = 2,
+ PM_VGPR64 = 3
+};
+
+const RegisterBankInfo::PartialMapping PartMappings[] {
+ // StartIdx, Length, RegBank
+ {0, 32, SGPRRegBank},
+ {0, 64, SGPRRegBank},
+ {0, 32, VGPRRegBank},
+ {0, 64, VGPRRegBank}
+};
+
+const RegisterBankInfo::ValueMapping ValMappings[] {
+ // SGPR 32-bit
+ {&PartMappings[0], 1},
+ // SGPR 64-bit
+ {&PartMappings[1], 1},
+ // VGPR 32-bit
+ {&PartMappings[2], 1},
+ // VGPR 64-bit
+ {&PartMappings[3], 1}
+};
+
+enum ValueMappingIdx {
+ SGPRStartIdx = 0,
+ VGPRStartIdx = 2
+};
+
+const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
+ unsigned Size) {
+ assert(Size % 32 == 0);
+ unsigned Idx = BankID == AMDGPU::SGPRRegBankID ? SGPRStartIdx : VGPRStartIdx;
+ Idx += (Size / 32) - 1;
+ return &ValMappings[Idx];
+}
+
+} // End AMDGPU namespace.
+} // End llvm namespace.
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 5bf347e48650..318de7f2e3d2 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -67,10 +67,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const AMDGPUSubtarget *Subtarget;
+ AMDGPUAS AMDGPUASI;
public:
explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel) {}
+ : SelectionDAGISel(TM, OptLevel){
+ AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
+ }
~AMDGPUDAGToDAGISel() override = default;
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -80,6 +83,7 @@ public:
private:
SDValue foldFrameIndex(SDValue N) const;
+ bool isNoNanSrc(SDValue N) const;
bool isInlineImmediate(const SDNode *N) const;
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
const R600InstrInfo *TII);
@@ -143,6 +147,8 @@ private:
bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
+
+ bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
@@ -156,7 +162,15 @@ private:
SDValue &Clamp,
SDValue &Omod) const;
+ bool SelectVOP3OMods(SDValue In, SDValue &Src,
+ SDValue &Clamp, SDValue &Omod) const;
+
+ bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp) const;
+
void SelectADD_SUB_I64(SDNode *N);
+ void SelectUADDO_USUBO(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
void SelectFMA_W_CHAIN(SDNode *N);
void SelectFMUL_W_CHAIN(SDNode *N);
@@ -187,6 +201,17 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
return SelectionDAGISel::runOnMachineFunction(MF);
}
+bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
+ if (TM.Options.NoNaNsFPMath)
+ return true;
+
+ // TODO: Move into isKnownNeverNaN
+ if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(N))
+ return BO->Flags.hasNoNaNs();
+
+ return CurDAG->isKnownNeverNaN(N);
+}
+
bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
const SIInstrInfo *TII
= static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
@@ -250,7 +275,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+ cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
return N;
const SITargetLowering& Lowering =
@@ -290,6 +315,20 @@ static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
llvm_unreachable("invalid vector size");
}
+static bool getConstantValue(SDValue N, uint32_t &Out) {
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ Out = C->getAPIntValue().getZExtValue();
+ return true;
+ }
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
+ Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
+ return true;
+ }
+
+ return false;
+}
+
void AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
@@ -319,6 +358,11 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectADD_SUB_I64(N);
return;
}
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SelectUADDO_USUBO(N);
+ return;
+ }
case AMDGPUISD::FMUL_W_CHAIN: {
SelectFMUL_W_CHAIN(N);
return;
@@ -336,7 +380,24 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
+
+ if (VT == MVT::v2i16 || VT == MVT::v2f16) {
+ if (Opc == ISD::BUILD_VECTOR) {
+ uint32_t LHSVal, RHSVal;
+ if (getConstantValue(N->getOperand(0), LHSVal) &&
+ getConstantValue(N->getOperand(1), RHSVal)) {
+ uint32_t K = LHSVal | (RHSVal << 16);
+ CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
+ CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
+ return;
+ }
+ }
+
+ break;
+ }
+
assert(EltVT.bitsEq(MVT::i32));
+
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
} else {
@@ -502,7 +563,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::CopyToReg: {
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
- Lowering.legalizeTargetIndependentNode(N, *CurDAG);
+ N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
break;
}
case ISD::AND:
@@ -531,9 +592,9 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
if (!N->readMem())
return false;
if (CbId == -1)
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+ return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
+ return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
}
bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
@@ -689,6 +750,17 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
CurDAG->RemoveDeadNode(N);
}
+void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
+ // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
+ // carry out despite the _i32 name. These were renamed in VI to _U32.
+ // FIXME: We should probably rename the opcodes here.
+ unsigned Opc = N->getOpcode() == ISD::UADDO ?
+ AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
+ { N->getOperand(0), N->getOperand(1) });
+}
+
void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
SDLoc SL(N);
// src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
@@ -1176,16 +1248,6 @@ bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
return true;
}
-///
-/// \param EncodedOffset This is the immediate value that will be encoded
-/// directly into the instruction. On SI/CI the \p EncodedOffset
-/// will be in units of dwords and on VI+ it will be units of bytes.
-static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
- int64_t EncodedOffset) {
- return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
- isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
-}
-
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDValue &Offset, bool &Imm) const {
@@ -1197,10 +1259,9 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDLoc SL(ByteOffsetNode);
AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
int64_t ByteOffset = C->getSExtValue();
- int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
- ByteOffset >> 2 : ByteOffset;
+ int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
- if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
+ if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
Imm = true;
return true;
@@ -1481,7 +1542,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
MemSDNode *Mem = cast<MemSDNode>(N);
unsigned AS = Mem->getAddressSpace();
- if (AS == AMDGPUAS::FLAT_ADDRESS) {
+ if (AS == AMDGPUASI.FLAT_ADDRESS) {
SelectCode(N);
return;
}
@@ -1545,7 +1606,6 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;
-
Src = In;
if (Src.getOpcode() == ISD::FNEG) {
@@ -1559,10 +1619,15 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
}
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
-
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ SelectVOP3Mods(In, Src, SrcMods);
+ return isNoNanSrc(Src);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
bool Res = SelectVOP3Mods(In, Src, SrcMods);
@@ -1607,6 +1672,50 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
+ SDValue &Clamp, SDValue &Omod) const {
+ Src = In;
+
+ SDLoc DL(In);
+ // FIXME: Handle Clamp and Omod
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
+
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ unsigned Mods = 0;
+ Src = In;
+
+ // FIXME: Look for on separate components
+ if (Src.getOpcode() == ISD::FNEG) {
+ Mods |= (SISrcMods::NEG | SISrcMods::NEG_HI);
+ Src = Src.getOperand(0);
+ }
+
+ // Packed instructions do not have abs modifiers.
+
+ // FIXME: Handle abs/neg of individual components.
+ // FIXME: Handle swizzling with op_sel
+ Mods |= SISrcMods::OP_SEL_1;
+
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
+ SDValue &SrcMods,
+ SDValue &Clamp) const {
+ SDLoc SL(In);
+
+ // FIXME: Handle clamp and op_sel
+ Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
+
+ return SelectVOP3PMods(In, Src, SrcMods);
+}
+
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 54caa2c5dfad..c0f336e082bd 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUCallLowering.h"
#include "AMDGPUFrameLowering.h"
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPURegisterInfo.h"
@@ -43,6 +44,37 @@ static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
return true;
}
+static bool allocateCCRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State,
+ const TargetRegisterClass *RC,
+ unsigned NumRegs) {
+ ArrayRef<MCPhysReg> RegList = makeArrayRef(RC->begin(), NumRegs);
+ unsigned RegResult = State.AllocateReg(RegList);
+ if (RegResult == AMDGPU::NoRegister)
+ return false;
+
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, RegResult, LocVT, LocInfo));
+ return true;
+}
+
+static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ switch (LocVT.SimpleTy) {
+ case MVT::i64:
+ case MVT::f64:
+ case MVT::v2i32:
+ case MVT::v2f32: {
+ // Up to SGPR0-SGPR39
+ return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
+ &AMDGPU::SGPR_64RegClass, 20);
+ }
+ default:
+ return false;
+ }
+}
+
#include "AMDGPUGenCallingConv.inc"
// Find a larger type to do a load / store of a vector with.
@@ -58,6 +90,7 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
const AMDGPUSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
+ AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::LOAD, MVT::f32, Promote);
@@ -211,10 +244,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// This is totally unsupported, just custom lower to produce an error.
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
- // We need to custom lower some of the intrinsics
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
-
// Library functions. These default to Expand, but we have instructions
// for them.
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
@@ -270,6 +299,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs) {
@@ -460,10 +490,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// N > 4 stores on the same chain.
GatherAllAliasesMaxDepth = 16;
- // FIXME: Need to really handle these.
- MaxStoresPerMemcpy = 4096;
- MaxStoresPerMemmove = 4096;
- MaxStoresPerMemset = 4096;
+ // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry
+ // about these during lowering.
+ MaxStoresPerMemcpy = 0xffffffff;
+ MaxStoresPerMemmove = 0xffffffff;
+ MaxStoresPerMemset = 0xffffffff;
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::SHL);
@@ -478,12 +509,14 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FNEG);
+ setTargetDAGCombine(ISD::FABS);
}
//===----------------------------------------------------------------------===//
// Target Information
//===----------------------------------------------------------------------===//
+LLVM_READNONE
static bool fnegFoldsIntoOp(unsigned Opc) {
switch (Opc) {
case ISD::FADD:
@@ -491,17 +524,77 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case ISD::FMUL:
case ISD::FMA:
case ISD::FMAD:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
case ISD::FSIN:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::SIN_HW:
case AMDGPUISD::FMUL_LEGACY:
+ case AMDGPUISD::FMIN_LEGACY:
+ case AMDGPUISD::FMAX_LEGACY:
return true;
default:
return false;
}
}
+/// \p returns true if the operation will definitely need to use a 64-bit
+/// encoding, and thus will use a VOP3 encoding regardless of the source
+/// modifiers.
+LLVM_READONLY
+static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) {
+ return N->getNumOperands() > 2 || VT == MVT::f64;
+}
+
+// Most FP instructions support source modifiers, but this could be refined
+// slightly.
+LLVM_READONLY
+static bool hasSourceMods(const SDNode *N) {
+ if (isa<MemSDNode>(N))
+ return false;
+
+ switch (N->getOpcode()) {
+ case ISD::CopyToReg:
+ case ISD::SELECT:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::INLINEASM:
+ case AMDGPUISD::INTERP_P1:
+ case AMDGPUISD::INTERP_P2:
+ case AMDGPUISD::DIV_SCALE:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static bool allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold = 4) {
+ // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
+ // it is truly free to use a source modifier in all cases. If there are
+ // multiple users but for each one will necessitate using VOP3, there will be
+ // a code size increase. Try to avoid increasing code size unless we know it
+ // will save on the instruction count.
+ unsigned NumMayIncreaseSize = 0;
+ MVT VT = N->getValueType(0).getScalarType().getSimpleVT();
+
+ // XXX - Should this limit number of uses to check?
+ for (const SDNode *U : N->uses()) {
+ if (!hasSourceMods(U))
+ return false;
+
+ if (!opMustUseVOP3Encoding(U, VT)) {
+ if (++NumMayIncreaseSize > CostThreshold)
+ return false;
+ }
+ }
+
+ return true;
+}
+
MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
return MVT::i32;
}
@@ -580,12 +673,17 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const {
bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
assert(VT.isFloatingPoint());
- return VT == MVT::f32 || VT == MVT::f64 || (Subtarget->has16BitInsts() &&
- VT == MVT::f16);
+
+ // Packed operations do not have a fabs modifier.
+ return VT == MVT::f32 || VT == MVT::f64 ||
+ (Subtarget->has16BitInsts() && VT == MVT::f16);
}
bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
- return isFAbsFree(VT);
+ assert(VT.isFloatingPoint());
+ return VT == MVT::f32 || VT == MVT::f64 ||
+ (Subtarget->has16BitInsts() && VT == MVT::f16) ||
+ (Subtarget->hasVOP3PInsts() && VT == MVT::v2f16);
}
bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
@@ -667,6 +765,11 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
// TargetLowering Callbacks
//===---------------------------------------------------------------------===//
+CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
+ bool IsVarArg) const {
+ return CC_AMDGPU;
+}
+
/// The SelectionDAGBuilder will automatically promote function arguments
/// with illegal types. However, this does not work for the AMDGPU targets
/// since the function arguments are stored in memory as these illegal types.
@@ -764,11 +867,6 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(CCState &State,
}
}
-void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
- const SmallVectorImpl<ISD::InputArg> &Ins) const {
- State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
-}
-
void AMDGPUTargetLowering::AnalyzeReturn(CCState &State,
const SmallVectorImpl<ISD::OutputArg> &Outs) const {
@@ -788,6 +886,24 @@ AMDGPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Target specific lowering
//===---------------------------------------------------------------------===//
+/// Selects the correct CCAssignFn for a given CallingConvention value.
+CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
+ bool IsVarArg) {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return CC_AMDGPU_Kernel;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ return CC_AMDGPU;
+ default:
+ report_fatal_error("Unsupported calling convention.");
+ }
+}
+
SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SDValue Callee = CLI.Callee;
@@ -829,14 +945,13 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
- Op->dump(&DAG);
+ Op->print(errs(), &DAG);
llvm_unreachable("Custom lowering code for this"
"instruction is not implemented yet!");
break;
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
case ISD::FREM: return LowerFREM(Op, DAG);
@@ -892,19 +1007,16 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();
- switch (G->getAddressSpace()) {
- case AMDGPUAS::LOCAL_ADDRESS: {
+ if (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
// XXX: What does the value of G->getOffset() mean?
assert(G->getOffset() == 0 &&
"Do not know what to do with an non-zero offset");
// TODO: We could emit code to handle the initialization somewhere.
- if (hasDefinedInitializer(GV))
- break;
-
- unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
- return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
- }
+ if (!hasDefinedInitializer(GV)) {
+ unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
+ return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
+ }
}
const Function &Fn = *DAG.getMachineFunction().getFunction();
@@ -936,41 +1048,12 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args);
}
-SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
- SelectionDAG &DAG) const {
- unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
-
- switch (IntrinsicID) {
- default: return Op;
- case AMDGPUIntrinsic::AMDGPU_clamp: // Legacy name.
- return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-
- case AMDGPUIntrinsic::AMDGPU_bfe_i32:
- return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
-
- case AMDGPUIntrinsic::AMDGPU_bfe_u32:
- return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
- }
-}
-
/// \brief Generate Min/Max node
-SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(const SDLoc &DL, EVT VT,
+SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
SDValue LHS, SDValue RHS,
SDValue True, SDValue False,
SDValue CC,
DAGCombinerInfo &DCI) const {
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return SDValue();
-
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
return SDValue();
@@ -1228,7 +1311,10 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
// float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(ISD::FMAD, DL, FltVT, fqneg, fb, fa);
+ unsigned OpCode = Subtarget->hasFP32Denormals() ?
+ (unsigned)AMDGPUISD::FMAD_FTZ :
+ (unsigned)ISD::FMAD;
+ SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
// int iq = (int)fq;
SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
@@ -1662,32 +1748,37 @@ SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) con
}
// XXX - May require not supporting f32 denormals?
-SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const {
+
+// Don't handle v2f16. The extra instructions to scalarize and repack around the
+// compare and vselect end up producing worse code than scalarizing the whole
+// operation.
+SDValue AMDGPUTargetLowering::LowerFROUND32_16(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue X = Op.getOperand(0);
+ EVT VT = Op.getValueType();
- SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
+ SDValue T = DAG.getNode(ISD::FTRUNC, SL, VT, X);
// TODO: Should this propagate fast-math-flags?
- SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
+ SDValue Diff = DAG.getNode(ISD::FSUB, SL, VT, X, T);
- SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
+ SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff);
- const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f32);
- const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
- const SDValue Half = DAG.getConstantFP(0.5, SL, MVT::f32);
+ const SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
+ const SDValue One = DAG.getConstantFP(1.0, SL, VT);
+ const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
- SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, One, X);
+ SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, VT, One, X);
EVT SetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
- SDValue Sel = DAG.getNode(ISD::SELECT, SL, MVT::f32, Cmp, SignOne, Zero);
+ SDValue Sel = DAG.getNode(ISD::SELECT, SL, VT, Cmp, SignOne, Zero);
- return DAG.getNode(ISD::FADD, SL, MVT::f32, T, Sel);
+ return DAG.getNode(ISD::FADD, SL, VT, T, Sel);
}
SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const {
@@ -1750,8 +1841,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const
SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- if (VT == MVT::f32)
- return LowerFROUND32(Op, DAG);
+ if (VT == MVT::f32 || VT == MVT::f16)
+ return LowerFROUND32_16(Op, DAG);
if (VT == MVT::f64)
return LowerFROUND64(Op, DAG);
@@ -2030,15 +2121,19 @@ SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
}
SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue N0 = Op.getOperand(0);
+
+ // Convert to target node to get known bits
+ if (N0.getValueType() == MVT::f32)
+ return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0);
if (getTargetMachine().Options.UnsafeFPMath) {
// There is a generic expand for FP_TO_FP16 with unsafe fast math.
return SDValue();
}
- SDLoc DL(Op);
- SDValue N0 = Op.getOperand(0);
- assert (N0.getSimpleValueType() == MVT::f64);
+ assert(N0.getSimpleValueType() == MVT::f64);
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
const unsigned ExpMask = 0x7ff;
@@ -2379,6 +2474,28 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
SN->getBasePtr(), SN->getMemOperand());
}
+SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ ConstantFPSDNode *CSrc = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
+ if (!CSrc)
+ return SDValue();
+
+ const APFloat &F = CSrc->getValueAPF();
+ APFloat Zero = APFloat::getZero(F.getSemantics());
+ APFloat::cmpResult Cmp0 = F.compare(Zero);
+ if (Cmp0 == APFloat::cmpLessThan ||
+ (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) {
+ return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
+ }
+
+ APFloat One(F.getSemantics(), "1.0");
+ APFloat::cmpResult Cmp1 = F.compare(One);
+ if (Cmp1 == APFloat::cmpGreaterThan)
+ return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0));
+
+ return SDValue(CSrc, 0);
+}
+
/// Split the 64-bit value \p LHS into two 32-bit components, and perform the
/// binary operation \p Opc to it with the corresponding constant operands.
SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
@@ -2821,20 +2938,41 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);
return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True);
}
- }
- if (VT == MVT::f32 && Cond.hasOneUse()) {
- SDValue MinMax
- = CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
- // Revisit this node so we can catch min3/max3/med3 patterns.
- //DCI.AddToWorklist(MinMax.getNode());
- return MinMax;
+ if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) {
+ SDValue MinMax
+ = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
+ // Revisit this node so we can catch min3/max3/med3 patterns.
+ //DCI.AddToWorklist(MinMax.getNode());
+ return MinMax;
+ }
}
// There's no reason to not do this if the condition has other uses.
return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
}
+static bool isConstantFPZero(SDValue N) {
+ if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
+ return C->isZero() && !C->isNegative();
+ return false;
+}
+
+static unsigned inverseMinMax(unsigned Opc) {
+ switch (Opc) {
+ case ISD::FMAXNUM:
+ return ISD::FMINNUM;
+ case ISD::FMINNUM:
+ return ISD::FMAXNUM;
+ case AMDGPUISD::FMAX_LEGACY:
+ return AMDGPUISD::FMIN_LEGACY;
+ case AMDGPUISD::FMIN_LEGACY:
+ return AMDGPUISD::FMAX_LEGACY;
+ default:
+ llvm_unreachable("invalid min/max opcode");
+ }
+}
+
SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -2847,10 +2985,16 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
// the other uses cannot, give up. This both prevents unprofitable
// transformations and infinite loops: we won't repeatedly try to fold around
// a negate that has no 'good' form.
- //
- // TODO: Check users can fold
- if (fnegFoldsIntoOp(Opc) && !N0.hasOneUse())
- return SDValue();
+ if (N0.hasOneUse()) {
+ // This may be able to fold into the source, but at a code size cost. Don't
+ // fold if the fold into the user is free.
+ if (allUsesHaveSourceMods(N, 0))
+ return SDValue();
+ } else {
+ if (fnegFoldsIntoOp(Opc) &&
+ (allUsesHaveSourceMods(N) || !allUsesHaveSourceMods(N0.getNode())))
+ return SDValue();
+ }
SDLoc SL(N);
switch (Opc) {
@@ -2872,7 +3016,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
else
RHS = RHS.getOperand(0);
- SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS);
+ SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags());
if (!N0.hasOneUse())
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
@@ -2891,7 +3035,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
else
RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
- SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS);
+ SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags());
if (!N0.hasOneUse())
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
@@ -2923,10 +3067,40 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
}
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM:
+ case AMDGPUISD::FMAX_LEGACY:
+ case AMDGPUISD::FMIN_LEGACY: {
+ // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
+ // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y)
+ // fneg (fmax_legacy x, y) -> fmin_legacy (fneg x), (fneg y)
+ // fneg (fmin_legacy x, y) -> fmax_legacy (fneg x), (fneg y)
+
+ SDValue LHS = N0.getOperand(0);
+ SDValue RHS = N0.getOperand(1);
+
+ // 0 doesn't have a negated inline immediate.
+ // TODO: Shouldn't fold 1/2pi either, and should be generalized to other
+ // operations.
+ if (isConstantFPZero(RHS))
+ return SDValue();
+
+ SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
+ SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
+ unsigned Opposite = inverseMinMax(Opc);
+
+ SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags());
+ if (!N0.hasOneUse())
+ DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
+ return Res;
+ }
case ISD::FP_EXTEND:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT: // XXX - Should fround be handled?
+ case ISD::FSIN:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
- case ISD::FSIN:
case AMDGPUISD::SIN_HW: {
SDValue CvtSrc = N0.getOperand(0);
if (CvtSrc.getOpcode() == ISD::FNEG) {
@@ -2941,7 +3115,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
// (fneg (fp_extend x)) -> (fp_extend (fneg x))
// (fneg (rcp x)) -> (rcp (fneg x))
SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
- return DAG.getNode(Opc, SL, VT, Neg);
+ return DAG.getNode(Opc, SL, VT, Neg, N0->getFlags());
}
case ISD::FP_ROUND: {
SDValue CvtSrc = N0.getOperand(0);
@@ -2959,6 +3133,45 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1));
}
+ case ISD::FP16_TO_FP: {
+ // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal
+ // f16, but legalization of f16 fneg ends up pulling it out of the source.
+ // Put the fneg back as a legal source operation that can be matched later.
+ SDLoc SL(N);
+
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000)
+ SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src,
+ DAG.getConstant(0x8000, SL, SrcVT));
+ return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg);
+ }
+ default:
+ return SDValue();
+ }
+}
+
+SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+
+ if (!N0.hasOneUse())
+ return SDValue();
+
+ switch (N0.getOpcode()) {
+ case ISD::FP16_TO_FP: {
+ assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal");
+ SDLoc SL(N);
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff)
+ SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src,
+ DAG.getConstant(0x7fff, SL, SrcVT));
+ return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs);
+ }
default:
return SDValue();
}
@@ -3071,6 +3284,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performSelectCombine(N, DCI);
case ISD::FNEG:
return performFNegCombine(N, DCI);
+ case ISD::FABS:
+ return performFAbsCombine(N, DCI);
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
assert(!N->getValueType(0).isVector() &&
@@ -3159,6 +3374,18 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performLoadCombine(N, DCI);
case ISD::STORE:
return performStoreCombine(N, DCI);
+ case AMDGPUISD::CLAMP:
+ return performClampCombine(N, DCI);
+ case AMDGPUISD::RCP: {
+ if (const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) {
+ // XXX - Should this flush denormals?
+ const APFloat &Val = CFP->getValueAPF();
+ APFloat One(Val.getSemantics(), "1.0");
+ return DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
+ }
+
+ break;
+ }
}
return SDValue();
}
@@ -3201,13 +3428,17 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((AMDGPUISD::NodeType)Opcode) {
case AMDGPUISD::FIRST_NUMBER: break;
// AMDIL DAG nodes
- NODE_NAME_CASE(CALL);
NODE_NAME_CASE(UMUL);
NODE_NAME_CASE(BRANCH_COND);
// AMDGPU DAG nodes
+ NODE_NAME_CASE(IF)
+ NODE_NAME_CASE(ELSE)
+ NODE_NAME_CASE(LOOP)
+ NODE_NAME_CASE(CALL)
+ NODE_NAME_CASE(RET_FLAG)
+ NODE_NAME_CASE(RETURN_TO_EPILOG)
NODE_NAME_CASE(ENDPGM)
- NODE_NAME_CASE(RETURN)
NODE_NAME_CASE(DWORDADDR)
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(SETCC)
@@ -3232,6 +3463,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(DIV_SCALE)
NODE_NAME_CASE(DIV_FMAS)
NODE_NAME_CASE(DIV_FIXUP)
+ NODE_NAME_CASE(FMAD_FTZ)
NODE_NAME_CASE(TRIG_PREOP)
NODE_NAME_CASE(RCP)
NODE_NAME_CASE(RSQ)
@@ -3265,7 +3497,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_ADDRESS)
NODE_NAME_CASE(REGISTER_LOAD)
NODE_NAME_CASE(REGISTER_STORE)
- NODE_NAME_CASE(LOAD_INPUT)
NODE_NAME_CASE(SAMPLE)
NODE_NAME_CASE(SAMPLEB)
NODE_NAME_CASE(SAMPLED)
@@ -3274,6 +3505,9 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CVT_F32_UBYTE1)
NODE_NAME_CASE(CVT_F32_UBYTE2)
NODE_NAME_CASE(CVT_F32_UBYTE3)
+ NODE_NAME_CASE(CVT_PKRTZ_F16_F32)
+ NODE_NAME_CASE(FP_TO_FP16)
+ NODE_NAME_CASE(FP16_ZEXT)
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
@@ -3338,13 +3572,11 @@ SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
}
void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
- const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const {
+ const SDValue Op, APInt &KnownZero, APInt &KnownOne,
+ const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
- KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
+ unsigned BitWidth = KnownZero.getBitWidth();
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
APInt KnownZero2;
APInt KnownOne2;
@@ -3365,21 +3597,27 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
if (!CWidth)
return;
- unsigned BitWidth = 32;
uint32_t Width = CWidth->getZExtValue() & 0x1f;
if (Opc == AMDGPUISD::BFE_U32)
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
+ KnownZero = APInt::getHighBitsSet(32, 32 - Width);
break;
}
+ case AMDGPUISD::FP_TO_FP16:
+ case AMDGPUISD::FP16_ZEXT: {
+ unsigned BitWidth = KnownZero.getBitWidth();
+
+ // High bits are zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
+ break;
+ }
}
}
unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
- SDValue Op,
- const SelectionDAG &DAG,
- unsigned Depth) const {
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ unsigned Depth) const {
switch (Op.getOpcode()) {
case AMDGPUISD::BFE_I32: {
ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
@@ -3403,7 +3641,9 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
case AMDGPUISD::CARRY:
case AMDGPUISD::BORROW:
return 31;
-
+ case AMDGPUISD::FP_TO_FP16:
+ case AMDGPUISD::FP16_ZEXT:
+ return 16;
default:
return 1;
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index f6adceac6f11..d6aa0ba92bf7 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -16,6 +16,8 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
+#include "AMDGPU.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
@@ -34,10 +36,10 @@ private:
protected:
const AMDGPUSubtarget *Subtarget;
+ AMDGPUAS AMDGPUASI;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
/// \brief Split a vector store into multiple scalar stores.
/// \returns The resulting chain.
@@ -47,7 +49,7 @@ protected:
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFROUND32_16(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
@@ -70,6 +72,7 @@ protected:
bool shouldCombineMemoryType(EVT VT) const;
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,
unsigned Opc, SDValue LHS,
@@ -85,6 +88,7 @@ protected:
SDValue RHS, DAGCombinerInfo &DCI) const;
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
@@ -111,8 +115,6 @@ protected:
SmallVectorImpl<SDValue> &Results) const;
void analyzeFormalArgumentsCompute(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
- void AnalyzeFormalArguments(CCState &State,
- const SmallVectorImpl<ISD::InputArg> &Ins) const;
void AnalyzeReturn(CCState &State,
const SmallVectorImpl<ISD::OutputArg> &Outs) const;
@@ -120,7 +122,7 @@ public:
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
bool mayIgnoreSignedZero(SDValue Op) const {
- if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only
+ if (getTargetMachine().Options.NoSignedZerosFPMath)
return true;
if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op))
@@ -158,6 +160,7 @@ public:
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
@@ -174,7 +177,7 @@ public:
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
- SDValue CombineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
SDValue CC, DAGCombinerInfo &DCI) const;
@@ -198,10 +201,12 @@ public:
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
- unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG,
+ unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts,
+ const SelectionDAG &DAG,
unsigned Depth = 0) const override;
/// \brief Helper function that adds Reg to the LiveIn list of the DAG's
@@ -222,6 +227,10 @@ public:
/// type of implicit parameter.
uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
const ImplicitParameter Param) const;
+
+ AMDGPUAS getAMDGPUAS() const {
+ return AMDGPUASI;
+ }
};
namespace AMDGPUISD {
@@ -229,15 +238,34 @@ namespace AMDGPUISD {
enum NodeType : unsigned {
// AMDIL ISD Opcodes
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- CALL, // Function call based on a single integer
UMUL, // 32bit unsigned multiplication
BRANCH_COND,
// End AMDIL ISD Opcodes
+
+ // Function call.
+ CALL,
+
+ // Masked control flow nodes.
+ IF,
+ ELSE,
+ LOOP,
+
+ // A uniform kernel return that terminates the wavefront.
ENDPGM,
- RETURN,
+
+ // Return to a shader part's epilog code.
+ RETURN_TO_EPILOG,
+
+ // Return with values from a non-entry function.
+ RET_FLAG,
+
DWORDADDR,
FRACT,
+
+ /// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output
+ /// modifier behavior with dx10_enable.
CLAMP,
+
// This is SETCC with the full mask result which is used for a compare with a
// result bit per item in the wavefront.
SETCC,
@@ -265,6 +293,9 @@ enum NodeType : unsigned {
DIV_SCALE,
DIV_FMAS,
DIV_FIXUP,
+ // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
+ // treated as an illegal operation.
+ FMAD_FTZ,
TRIG_PREOP, // 1 ULP max error for f64
// RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
@@ -301,7 +332,6 @@ enum NodeType : unsigned {
CONST_ADDRESS,
REGISTER_LOAD,
REGISTER_STORE,
- LOAD_INPUT,
SAMPLE,
SAMPLEB,
SAMPLED,
@@ -312,6 +342,18 @@ enum NodeType : unsigned {
CVT_F32_UBYTE1,
CVT_F32_UBYTE2,
CVT_F32_UBYTE3,
+
+ // Convert two float 32 numbers into a single register holding two packed f16
+ // with round to zero.
+ CVT_PKRTZ_F16_F32,
+
+ // Same as the standard node, except the high bits of the resulting integer
+ // are known 0.
+ FP_TO_FP16,
+
+ // Wrapper around fp16 results that are known to zero the high bits.
+ FP16_ZEXT,
+
/// This node is for VLIW targets and it is used to represent a vector
/// that is stored in consecutive registers with the same channel.
/// For example:
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index e4dc6599e156..a01f5d37c7c1 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -30,7 +30,7 @@ using namespace llvm;
void AMDGPUInstrInfo::anchor() {}
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
- : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
+ : AMDGPUGenInstrInfo(-1, -1), ST(ST), AMDGPUASI(ST.getAMDGPUAS()) {}
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
// the first 16 loads will be interleaved with the stores, and the next 16 will
@@ -86,6 +86,7 @@ static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
case AMDGPUSubtarget::SEA_ISLANDS:
return SIEncodingFamily::SI;
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
+ case AMDGPUSubtarget::GFX9:
return SIEncodingFamily::VI;
// FIXME: This should never be called for r600 GPUs.
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index bd8e389639f5..12caa5118342 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -16,11 +16,11 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
+#include "AMDGPU.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#define GET_INSTRINFO_HEADER
-#define GET_INSTRINFO_ENUM
#include "AMDGPUGenInstrInfo.inc"
namespace llvm {
@@ -35,6 +35,8 @@ private:
const AMDGPUSubtarget &ST;
virtual void anchor();
+protected:
+ AMDGPUAS AMDGPUASI;
public:
explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index d7fa28bdc001..56f060984f08 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -31,6 +31,10 @@ def AMDGPUFPClassOp : SDTypeProfile<1, 2,
[SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>]
>;
+def AMDGPUFPPackOp : SDTypeProfile<1, 2,
+ [SDTCisFP<1>, SDTCisSameAs<1, 2>]
+>;
+
def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>;
@@ -42,10 +46,38 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4,
def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def AMDGPUIfOp : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
+>;
+
+def AMDGPUElseOp : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, OtherVT>]
+>;
+
+def AMDGPULoopOp : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>]
+>;
+
+def AMDGPUBreakOp : SDTypeProfile<1, 1,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i64>]
+>;
+
+def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
+>;
+
+def AMDGPUElseBreakOp : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, i64>]
+>;
+
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
+def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>;
+def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>;
+def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>;
+
def AMDGPUconstdata_ptr : SDNode<
"AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>,
SDTCisVT<0, iPTR>]>
@@ -78,6 +110,11 @@ def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;
def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
+def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
+def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>;
+def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>;
+
+
def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>;
// out = max(a, b) a and b are floats, where a nan comparison fails.
@@ -92,17 +129,7 @@ def AMDGPUfmul_legacy : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]
>;
-def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
-
-// out = max(a, b) a and b are signed ints
-def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = max(a, b) a and b are unsigned ints
-def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
+def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
// out = min(a, b) a and b are floats, where a nan comparison fails.
def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
@@ -194,6 +221,8 @@ def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
// Denominator, src2 = Numerator).
def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
+def AMDGPUfmad_ftz : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
+
// Look Up 2.0 / pi src0 with segment select src1[4:0]
def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
@@ -291,15 +320,16 @@ def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT,
// SI+ export
def AMDGPUExportOp : SDTypeProfile<0, 8, [
- SDTCisInt<0>, // i8 en
- SDTCisInt<1>, // i1 vm
+ SDTCisInt<0>, // i8 tgt
+ SDTCisInt<1>, // i8 en
+ // i32 or f32 src0
+ SDTCisSameAs<3, 2>, // f32 src1
+ SDTCisSameAs<4, 2>, // f32 src2
+ SDTCisSameAs<5, 2>, // f32 src3
+ SDTCisInt<6>, // i1 compr
// skip done
- SDTCisInt<2>, // i8 tgt
- SDTCisSameAs<3, 1>, // i1 compr
- SDTCisFP<4>, // f32 src0
- SDTCisSameAs<5, 4>, // f32 src1
- SDTCisSameAs<6, 4>, // f32 src2
- SDTCisSameAs<7, 4> // f32 src3
+ SDTCisInt<1> // i1 vm
+
]>;
def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp,
@@ -333,5 +363,9 @@ def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai
def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
-def AMDGPUreturn : SDNode<"AMDGPUISD::RETURN", SDTNone,
+def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
+>;
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
new file mode 100644
index 000000000000..8867ed689a31
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -0,0 +1,424 @@
+//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the InstructionSelector class for
+/// AMDGPU.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstructionSelector.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterBankInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "amdgpu-isel"
+
+using namespace llvm;
+
+AMDGPUInstructionSelector::AMDGPUInstructionSelector(
+ const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI)
+ : InstructionSelector(), TII(*STI.getInstrInfo()),
+ TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {}
+
+MachineOperand
+AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
+ unsigned SubIdx) const {
+
+ MachineInstr *MI = MO.getParent();
+ MachineBasicBlock *BB = MO.getParent()->getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+
+ if (MO.isReg()) {
+ unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
+ unsigned Reg = MO.getReg();
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
+ .addReg(Reg, 0, ComposedSubIdx);
+
+ return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
+ MO.isKill(), MO.isDead(), MO.isUndef(),
+ MO.isEarlyClobber(), 0, MO.isDebug(),
+ MO.isInternalRead());
+ }
+
+ assert(MO.isImm());
+
+ APInt Imm(64, MO.getImm());
+
+ switch (SubIdx) {
+ default:
+ llvm_unreachable("do not know to split immediate with this sub index.");
+ case AMDGPU::sub0:
+ return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
+ case AMDGPU::sub1:
+ return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
+ }
+}
+
+bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
+ unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ if (Size != 64)
+ return false;
+
+ DebugLoc DL = I.getDebugLoc();
+
+ MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
+ MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
+ .add(Lo1)
+ .add(Lo2);
+
+ MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
+ MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
+ .add(Hi1)
+ .add(Hi2);
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
+ .addReg(DstLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(DstHi)
+ .addImm(AMDGPU::sub1);
+
+ for (MachineOperand &MO : I.explicit_operands()) {
+ if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ continue;
+ RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
+ }
+
+ I.eraseFromParent();
+ return true;
+}
+
+bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
+ return selectG_ADD(I);
+}
+
+bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ DebugLoc DL = I.getDebugLoc();
+
+ // FIXME: Select store instruction based on address space
+ MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
+ .add(I.getOperand(1))
+ .add(I.getOperand(0))
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+
+ // Now that we selected an opcode, we need to constrain the register
+ // operands to use appropriate classes.
+ bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
+
+ I.eraseFromParent();
+ return Ret;
+}
+
+bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned DstReg = I.getOperand(0).getReg();
+ unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+
+ if (Size == 32) {
+ I.setDesc(TII.get(AMDGPU::S_MOV_B32));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+
+ assert(Size == 64);
+
+ DebugLoc DL = I.getDebugLoc();
+ unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ const APInt &Imm = I.getOperand(1).getCImm()->getValue();
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg)
+ .addImm(Imm.trunc(32).getZExtValue());
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
+ .addImm(Imm.ashr(32).getZExtValue());
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
+ .addReg(LoReg)
+ .addImm(AMDGPU::sub0)
+ .addReg(HiReg)
+ .addImm(AMDGPU::sub1);
+ // We can't call constrainSelectedInstRegOperands here, because it doesn't
+ // work for target independent opcodes
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
+}
+
+static bool isConstant(const MachineInstr &MI) {
+ return MI.getOpcode() == TargetOpcode::G_CONSTANT;
+}
+
+void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
+ const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
+
+ const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
+
+ assert(PtrMI);
+
+ if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
+ return;
+
+ GEPInfo GEPInfo(*PtrMI);
+
+ for (unsigned i = 1, e = 3; i < e; ++i) {
+ const MachineOperand &GEPOp = PtrMI->getOperand(i);
+ const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
+ assert(OpDef);
+ if (isConstant(*OpDef)) {
+ // FIXME: Is it possible to have multiple Imm parts? Maybe if we
+ // are lacking other optimizations.
+ assert(GEPInfo.Imm == 0);
+ GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
+ continue;
+ }
+ const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
+ if (OpBank->getID() == AMDGPU::SGPRRegBankID)
+ GEPInfo.SgprParts.push_back(GEPOp.getReg());
+ else
+ GEPInfo.VgprParts.push_back(GEPOp.getReg());
+ }
+
+ AddrInfo.push_back(GEPInfo);
+ getAddrModeInfo(*PtrMI, MRI, AddrInfo);
+}
+
+static bool isInstrUniform(const MachineInstr &MI) {
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ const Value *Ptr = MMO->getValue();
+
+ // UndefValue means this is a load of a kernel input. These are uniform.
+ // Sometimes LDS instructions have constant pointers.
+ // If Ptr is null, then that means this mem operand contains a
+ // PseudoSourceValue like GOT.
+ if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
+ isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
+ return true;
+
+ const Instruction *I = dyn_cast<Instruction>(Ptr);
+ return I && I->getMetadata("amdgpu.uniform");
+}
+
+static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
+
+ if (LoadSize == 32)
+ return BaseOpcode;
+
+ switch (BaseOpcode) {
+ case AMDGPU::S_LOAD_DWORD_IMM:
+ switch (LoadSize) {
+ case 64:
+ return AMDGPU::S_LOAD_DWORDX2_IMM;
+ case 128:
+ return AMDGPU::S_LOAD_DWORDX4_IMM;
+ case 256:
+ return AMDGPU::S_LOAD_DWORDX8_IMM;
+ case 512:
+ return AMDGPU::S_LOAD_DWORDX16_IMM;
+ }
+ break;
+ case AMDGPU::S_LOAD_DWORD_IMM_ci:
+ switch (LoadSize) {
+ case 64:
+ return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
+ case 128:
+ return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
+ case 256:
+ return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
+ case 512:
+ return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
+ }
+ break;
+ case AMDGPU::S_LOAD_DWORD_SGPR:
+ switch (LoadSize) {
+ case 64:
+ return AMDGPU::S_LOAD_DWORDX2_SGPR;
+ case 128:
+ return AMDGPU::S_LOAD_DWORDX4_SGPR;
+ case 256:
+ return AMDGPU::S_LOAD_DWORDX8_SGPR;
+ case 512:
+ return AMDGPU::S_LOAD_DWORDX16_SGPR;
+ }
+ break;
+ }
+ llvm_unreachable("Invalid base smrd opcode or size");
+}
+
+bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
+ for (const GEPInfo &GEPInfo : AddrInfo) {
+ if (!GEPInfo.VgprParts.empty())
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
+ ArrayRef<GEPInfo> AddrInfo) const {
+
+ if (!I.hasOneMemOperand())
+ return false;
+
+ if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS)
+ return false;
+
+ if (!isInstrUniform(I))
+ return false;
+
+ if (hasVgprParts(AddrInfo))
+ return false;
+
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned DstReg = I.getOperand(0).getReg();
+ const DebugLoc &DL = I.getDebugLoc();
+ unsigned Opcode;
+ unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+
+ if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
+
+ const GEPInfo &GEPInfo = AddrInfo[0];
+
+ unsigned PtrReg = GEPInfo.SgprParts[0];
+ int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
+ if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
+ Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
+
+ MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
+ .addReg(PtrReg)
+ .addImm(EncodedImm)
+ .addImm(0); // glc
+ return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
+ }
+
+ if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
+ isUInt<32>(EncodedImm)) {
+ Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
+ MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
+ .addReg(PtrReg)
+ .addImm(EncodedImm)
+ .addImm(0); // glc
+ return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
+ }
+
+ if (isUInt<32>(GEPInfo.Imm)) {
+ Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
+ unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
+ .addImm(GEPInfo.Imm);
+
+ MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
+ .addReg(PtrReg)
+ .addReg(OffsetReg)
+ .addImm(0); // glc
+ return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
+ }
+ }
+
+ unsigned PtrReg = I.getOperand(1).getReg();
+ Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
+ MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
+ .addReg(PtrReg)
+ .addImm(0)
+ .addImm(0); // glc
+ return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
+}
+
+
+bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ DebugLoc DL = I.getDebugLoc();
+ unsigned DstReg = I.getOperand(0).getReg();
+ unsigned PtrReg = I.getOperand(1).getReg();
+ unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+ unsigned Opcode;
+
+ SmallVector<GEPInfo, 4> AddrInfo;
+
+ getAddrModeInfo(I, MRI, AddrInfo);
+
+ if (selectSMRD(I, AddrInfo)) {
+ I.eraseFromParent();
+ return true;
+ }
+
+ switch (LoadSize) {
+ default:
+ llvm_unreachable("Load size not supported\n");
+ case 32:
+ Opcode = AMDGPU::FLAT_LOAD_DWORD;
+ break;
+ case 64:
+ Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
+ break;
+ }
+
+ MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
+ .add(I.getOperand(0))
+ .addReg(PtrReg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+
+ bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
+ I.eraseFromParent();
+ return Ret;
+}
+
+bool AMDGPUInstructionSelector::select(MachineInstr &I) const {
+
+ if (!isPreISelGenericOpcode(I.getOpcode()))
+ return true;
+
+ switch (I.getOpcode()) {
+ default:
+ break;
+ case TargetOpcode::G_ADD:
+ return selectG_ADD(I);
+ case TargetOpcode::G_CONSTANT:
+ return selectG_CONSTANT(I);
+ case TargetOpcode::G_GEP:
+ return selectG_GEP(I);
+ case TargetOpcode::G_LOAD:
+ return selectG_LOAD(I);
+ case TargetOpcode::G_STORE:
+ return selectG_STORE(I);
+ }
+ return false;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
new file mode 100644
index 000000000000..c87102e55dfb
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -0,0 +1,67 @@
+//===- AMDGPUInstructionSelector --------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the InstructionSelector class for
+/// AMDGPU.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
+
+#include "AMDGPU.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class AMDGPUInstrInfo;
+class AMDGPURegisterBankInfo;
+class MachineInstr;
+class MachineOperand;
+class MachineRegisterInfo;
+class SIInstrInfo;
+class SIRegisterInfo;
+class SISubtarget;
+
+class AMDGPUInstructionSelector : public InstructionSelector {
+public:
+ AMDGPUInstructionSelector(const SISubtarget &STI,
+ const AMDGPURegisterBankInfo &RBI);
+
+ bool select(MachineInstr &I) const override;
+private:
+ struct GEPInfo {
+ const MachineInstr &GEP;
+ SmallVector<unsigned, 2> SgprParts;
+ SmallVector<unsigned, 2> VgprParts;
+ int64_t Imm;
+ GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
+ };
+
+ MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const;
+ bool selectG_CONSTANT(MachineInstr &I) const;
+ bool selectG_ADD(MachineInstr &I) const;
+ bool selectG_GEP(MachineInstr &I) const;
+ bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
+ void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
+ SmallVectorImpl<GEPInfo> &AddrInfo) const;
+ bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
+ bool selectG_LOAD(MachineInstr &I) const;
+ bool selectG_STORE(MachineInstr &I) const;
+
+ const SIInstrInfo &TII;
+ const SIRegisterInfo &TRI;
+ const AMDGPURegisterBankInfo &RBI;
+protected:
+ AMDGPUAS AMDGPUASI;
+};
+
+} // End llvm namespace.
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td
index 59cba636c586..b8d681298dee 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -72,6 +72,49 @@ def u8imm : Operand<i8> {
def brtarget : Operand<OtherVT>;
//===----------------------------------------------------------------------===//
+// Misc. PatFrags
+//===----------------------------------------------------------------------===//
+
+class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
+ (ops node:$src0),
+ (op $src0),
+ [{ return N->hasOneUse(); }]
+>;
+
+class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
+ (ops node:$src0, node:$src1),
+ (op $src0, $src1),
+ [{ return N->hasOneUse(); }]
+>;
+
+class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
+ (ops node:$src0, node:$src1, node:$src2),
+ (op $src0, $src1, $src2),
+ [{ return N->hasOneUse(); }]
+>;
+
+def trunc_oneuse : HasOneUseUnaryOp<trunc>;
+
+let Properties = [SDNPCommutative, SDNPAssociative] in {
+def smax_oneuse : HasOneUseBinOp<smax>;
+def smin_oneuse : HasOneUseBinOp<smin>;
+def umax_oneuse : HasOneUseBinOp<umax>;
+def umin_oneuse : HasOneUseBinOp<umin>;
+def fminnum_oneuse : HasOneUseBinOp<fminnum>;
+def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
+def and_oneuse : HasOneUseBinOp<and>;
+def or_oneuse : HasOneUseBinOp<or>;
+def xor_oneuse : HasOneUseBinOp<xor>;
+} // Properties = [SDNPCommutative, SDNPAssociative]
+
+def sub_oneuse : HasOneUseBinOp<sub>;
+
+def srl_oneuse : HasOneUseBinOp<srl>;
+def shl_oneuse : HasOneUseBinOp<shl>;
+
+def select_oneuse : HasOneUseTernaryOp<select>;
+
+//===----------------------------------------------------------------------===//
// PatLeafs for floating-point comparisons
//===----------------------------------------------------------------------===//
@@ -157,27 +200,11 @@ def COND_NULL : PatLeaf <
//===----------------------------------------------------------------------===//
-// Misc. PatFrags
-//===----------------------------------------------------------------------===//
-
-class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
- (ops node:$src0, node:$src1),
- (op $src0, $src1),
- [{ return N->hasOneUse(); }]
->;
-
-class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
- (ops node:$src0, node:$src1, node:$src2),
- (op $src0, $src1, $src2),
- [{ return N->hasOneUse(); }]
->;
-
-//===----------------------------------------------------------------------===//
// Load/Store Pattern Fragments
//===----------------------------------------------------------------------===//
class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
}]>;
class PrivateLoad <SDPatternOperator op> : PrivateMemOp <
@@ -195,7 +222,7 @@ def truncstorei16_private : PrivateStore <truncstorei16>;
def store_private : PrivateStore <store>;
class GlobalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
}]>;
// Global address space loads
@@ -215,7 +242,7 @@ def global_store_atomic : GlobalStore<atomic_store>;
class ConstantMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
}]>;
// Constant address space loads
@@ -226,7 +253,7 @@ class ConstantLoad <SDPatternOperator op> : ConstantMemOp <
def constant_load : ConstantLoad<load>;
class LocalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
// Local address space loads
@@ -239,7 +266,7 @@ class LocalStore <SDPatternOperator op> : LocalMemOp <
>;
class FlatMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUASI.FLAT_ADDRESS;
}]>;
class FlatLoad <SDPatternOperator op> : FlatMemOp <
@@ -321,7 +348,7 @@ def local_store_aligned8bytes : Aligned8Bytes <
class local_binary_atomic_op<SDNode atomic_op> :
PatFrag<(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
@@ -339,7 +366,7 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
}]>;
multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
@@ -349,7 +376,7 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
(cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
AtomicSDNode *AN = cast<AtomicSDNode>(N);
return AN->getMemoryVT() == MVT::i32 &&
- AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
def _64_local : PatFrag<
@@ -357,7 +384,7 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
(cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
AtomicSDNode *AN = cast<AtomicSDNode>(N);
return AN->getMemoryVT() == MVT::i64 &&
- AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
}
@@ -367,17 +394,17 @@ multiclass global_binary_atomic_op<SDNode atomic_op> {
def "" : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
def _noret : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
def _ret : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
}
defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
@@ -395,22 +422,22 @@ defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
def AMDGPUatomic_cmp_swap_global : PatFrag<
(ops node:$ptr, node:$value),
(AMDGPUatomic_cmp_swap node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
def atomic_cmp_swap_global : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
def atomic_cmp_swap_global_noret : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
def atomic_cmp_swap_global_ret : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
//===----------------------------------------------------------------------===//
// Misc Pattern Fragments
@@ -422,6 +449,7 @@ int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
int FP16_ONE = 0x3C00;
+int V2FP16_ONE = 0x3C003C00;
int FP32_ONE = 0x3f800000;
int FP32_NEG_ONE = 0xbf800000;
int FP64_ONE = 0x3ff0000000000000;
@@ -452,7 +480,7 @@ class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"CLAMP $dst, $src0",
- [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+ [(set f32:$dst, (AMDGPUclamp f32:$src0))]
>;
class FABS <RegisterClass rc> : AMDGPUShaderInst <
@@ -565,6 +593,12 @@ multiclass BFIPatterns <Instruction BFI_INT,
>;
def : Pat <
+ (f32 (fcopysign f32:$src0, f64:$src1)),
+ (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0,
+ (i32 (EXTRACT_SUBREG $src1, sub1)))
+ >;
+
+ def : Pat <
(f64 (fcopysign f64:$src0, f64:$src1)),
(REG_SEQUENCE RC64,
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
@@ -602,10 +636,22 @@ def IMMPopCount : SDNodeXForm<imm, [{
MVT::i32);
}]>;
-class BFEPattern <Instruction BFE, Instruction MOV> : Pat <
- (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
- (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
->;
+multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
+ def : Pat <
+ (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
+ (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
+ >;
+
+ def : Pat <
+ (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+ (UBFE $src, (i32 0), $width)
+ >;
+
+ def : Pat <
+ (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+ (SBFE $src, (i32 0), $width)
+ >;
+}
// rotr pattern
class ROTRPattern <Instruction BIT_ALIGN> : Pat <
@@ -618,23 +664,13 @@ class ROTRPattern <Instruction BIT_ALIGN> : Pat <
class IntMed3Pat<Instruction med3Inst,
SDPatternOperator max,
SDPatternOperator max_oneuse,
- SDPatternOperator min_oneuse> : Pat<
- (max (min_oneuse i32:$src0, i32:$src1),
- (min_oneuse (max_oneuse i32:$src0, i32:$src1), i32:$src2)),
+ SDPatternOperator min_oneuse,
+ ValueType vt = i32> : Pat<
+ (max (min_oneuse vt:$src0, vt:$src1),
+ (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
(med3Inst $src0, $src1, $src2)
>;
-let Properties = [SDNPCommutative, SDNPAssociative] in {
-def smax_oneuse : HasOneUseBinOp<smax>;
-def smin_oneuse : HasOneUseBinOp<smin>;
-def umax_oneuse : HasOneUseBinOp<umax>;
-def umin_oneuse : HasOneUseBinOp<umin>;
-} // Properties = [SDNPCommutative, SDNPAssociative]
-
-def sub_oneuse : HasOneUseBinOp<sub>;
-
-def select_oneuse : HasOneUseTernaryOp<select>;
-
// Special conversion patterns
def cvt_rpi_i32_f32 : PatFrag <
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp b/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
index 8e3471bd2083..86dc9bd9ea74 100644
--- a/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
@@ -54,14 +54,7 @@ std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
FunctionType *AMDGPUIntrinsicInfo::getType(LLVMContext &Context, unsigned ID,
ArrayRef<Type*> Tys) const {
// FIXME: Re-use Intrinsic::getType machinery
- switch (ID) {
- case AMDGPUIntrinsic::amdgcn_fdiv_fast: {
- Type *F32Ty = Type::getFloatTy(Context);
- return FunctionType::get(F32Ty, { F32Ty, F32Ty }, false);
- }
- default:
- llvm_unreachable("unhandled intrinsic");
- }
+ llvm_unreachable("unhandled intrinsic");
}
unsigned AMDGPUIntrinsicInfo::lookupName(const char *NameData,
@@ -97,8 +90,8 @@ Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
Function *F
= cast<Function>(M->getOrInsertFunction(getName(IntrID, Tys), FTy));
- AttributeSet AS = getAttributes(M->getContext(),
- static_cast<AMDGPUIntrinsic::ID>(IntrID));
+ AttributeList AS =
+ getAttributes(M->getContext(), static_cast<AMDGPUIntrinsic::ID>(IntrID));
F->setAttributes(AS);
return F;
}
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
index ceae0b575395..18c9bd933af2 100644
--- a/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -12,25 +12,8 @@
//===----------------------------------------------------------------------===//
let TargetPrefix = "AMDGPU", isTarget = 1 in {
- def int_AMDGPU_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
-
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
-
- // Deprecated in favor of llvm.amdgcn.sffbh
- def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-
- // Deprecated in favor of separate int_amdgcn_cube* intrinsics.
- def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-
- // Deprecated in favor of expanded bit operations
- def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-
- // Deprecated in favor of llvm.amdgcn.rsq
- def int_AMDGPU_rsq : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
- >;
}
include "SIIntrinsics.td"
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
new file mode 100644
index 000000000000..a2567a549028
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -0,0 +1,62 @@
+//===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for
+/// AMDGPU.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPULegalizerInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
+ using namespace TargetOpcode;
+
+ const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
+ const LLT P1 = LLT::pointer(1, 64);
+ const LLT P2 = LLT::pointer(2, 64);
+
+ setAction({G_CONSTANT, S64}, Legal);
+
+ setAction({G_GEP, P1}, Legal);
+ setAction({G_GEP, P2}, Legal);
+ setAction({G_GEP, 1, S64}, Legal);
+
+ setAction({G_LOAD, P1}, Legal);
+ setAction({G_LOAD, P2}, Legal);
+ setAction({G_LOAD, S32}, Legal);
+ setAction({G_LOAD, 1, P1}, Legal);
+ setAction({G_LOAD, 1, P2}, Legal);
+
+ setAction({G_STORE, S32}, Legal);
+ setAction({G_STORE, 1, P1}, Legal);
+
+ // FIXME: When RegBankSelect inserts copies, it will only create new
+ // registers with scalar types. This means we can end up with
+ // G_LOAD/G_STORE/G_GEP instruction with scalar types for their pointer
+ // operands. In assert builds, the instruction selector will assert
+ // if it sees a generic instruction which isn't legal, so we need to
+ // tell it that scalar types are legal for pointer operands
+ setAction({G_GEP, S64}, Legal);
+ setAction({G_LOAD, 1, S64}, Legal);
+ setAction({G_STORE, 1, S64}, Legal);
+
+ computeTables();
+}
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
new file mode 100644
index 000000000000..291e3361f163
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -0,0 +1,30 @@
+//===- AMDGPULegalizerInfo ---------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the Machinelegalizer class for
+/// AMDGPU.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+namespace llvm {
+
+class LLVMContext;
+
+/// This class provides the information for the target register banks.
+class AMDGPULegalizerInfo : public LegalizerInfo {
+public:
+ AMDGPULegalizerInfo();
+};
+} // End llvm namespace.
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
new file mode 100644
index 000000000000..dcb6670621ee
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -0,0 +1,160 @@
+//===-- AMDGPULowerIntrinsics.cpp -----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+
+#define DEBUG_TYPE "amdgpu-lower-intrinsics"
+
+using namespace llvm;
+
+namespace {
+
+const unsigned MaxStaticSize = 1024;
+
+class AMDGPULowerIntrinsics : public ModulePass {
+private:
+ const TargetMachine *TM;
+
+ bool makeLIDRangeMetadata(Function &F) const;
+
+public:
+ static char ID;
+
+ AMDGPULowerIntrinsics(const TargetMachine *TM = nullptr)
+ : ModulePass(ID), TM(TM) { }
+ bool runOnModule(Module &M) override;
+ StringRef getPassName() const override {
+ return "AMDGPU Lower Intrinsics";
+ }
+};
+
+}
+
+char AMDGPULowerIntrinsics::ID = 0;
+
+char &llvm::AMDGPULowerIntrinsicsID = AMDGPULowerIntrinsics::ID;
+
+INITIALIZE_TM_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE,
+ "Lower intrinsics", false, false)
+
+// TODO: Should refine based on estimated number of accesses (e.g. does it
+// require splitting based on alignment)
+static bool shouldExpandOperationWithSize(Value *Size) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(Size);
+ return !CI || (CI->getZExtValue() > MaxStaticSize);
+}
+
+static bool expandMemIntrinsicUses(Function &F) {
+ Intrinsic::ID ID = F.getIntrinsicID();
+ bool Changed = false;
+
+ for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
+ Instruction *Inst = cast<Instruction>(*I);
+ ++I;
+
+ switch (ID) {
+ case Intrinsic::memcpy: {
+ auto *Memcpy = cast<MemCpyInst>(Inst);
+ if (shouldExpandOperationWithSize(Memcpy->getLength())) {
+ expandMemCpyAsLoop(Memcpy);
+ Changed = true;
+ Memcpy->eraseFromParent();
+ }
+
+ break;
+ }
+ case Intrinsic::memmove: {
+ auto *Memmove = cast<MemMoveInst>(Inst);
+ if (shouldExpandOperationWithSize(Memmove->getLength())) {
+ expandMemMoveAsLoop(Memmove);
+ Changed = true;
+ Memmove->eraseFromParent();
+ }
+
+ break;
+ }
+ case Intrinsic::memset: {
+ auto *Memset = cast<MemSetInst>(Inst);
+ if (shouldExpandOperationWithSize(Memset->getLength())) {
+ expandMemSetAsLoop(Memset);
+ Changed = true;
+ Memset->eraseFromParent();
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
+ if (!TM)
+ return false;
+
+ bool Changed = false;
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
+
+ for (auto *U : F.users()) {
+ auto *CI = dyn_cast<CallInst>(U);
+ if (!CI)
+ continue;
+
+ Changed |= ST.makeLIDRangeMetadata(CI);
+ }
+ return Changed;
+}
+
+bool AMDGPULowerIntrinsics::runOnModule(Module &M) {
+ bool Changed = false;
+
+ for (Function &F : M) {
+ if (!F.isDeclaration())
+ continue;
+
+ switch (F.getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ if (expandMemIntrinsicUses(F))
+ Changed = true;
+ break;
+
+ case Intrinsic::amdgcn_workitem_id_x:
+ case Intrinsic::r600_read_tidig_x:
+ case Intrinsic::amdgcn_workitem_id_y:
+ case Intrinsic::r600_read_tidig_y:
+ case Intrinsic::amdgcn_workitem_id_z:
+ case Intrinsic::r600_read_tidig_z:
+ case Intrinsic::r600_read_local_size_x:
+ case Intrinsic::r600_read_local_size_y:
+ case Intrinsic::r600_read_local_size_z:
+ Changed |= makeLIDRangeMetadata(F);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+ModulePass *llvm::createAMDGPULowerIntrinsicsPass(const TargetMachine *TM) {
+ return new AMDGPULowerIntrinsics(TM);
+}
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 7d56355074b1..14ee1c81f8fa 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -151,6 +151,28 @@ bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
return MCInstLowering.lowerOperand(MO, MCOp);
}
+const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
+ // TargetMachine does not support llvm-style cast. Use C++-style cast.
+ // This is safe since TM is always of type AMDGPUTargetMachine or its
+ // derived class.
+ auto *AT = static_cast<AMDGPUTargetMachine*>(&TM);
+ auto *CE = dyn_cast<ConstantExpr>(CV);
+
+ // Lower null pointers in private and local address space.
+ // Clang generates addrspacecast for null pointers in private and local
+ // address space, which needs to be lowered.
+ if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) {
+ auto Op = CE->getOperand(0);
+ auto SrcAddr = Op->getType()->getPointerAddressSpace();
+ if (Op->isNullValue() && AT->getNullPointerValue(SrcAddr) == 0) {
+ auto DstAddr = CE->getType()->getPointerAddressSpace();
+ return MCConstantExpr::create(AT->getNullPointerValue(DstAddr),
+ OutContext);
+ }
+ }
+ return AsmPrinter::lowerConstant(CV);
+}
+
void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
@@ -162,7 +184,7 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext();
C.emitError("Illegal instruction detected: " + Err);
- MI->dump();
+ MI->print(errs());
}
if (MI->isBundle()) {
@@ -173,8 +195,9 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
++I;
}
} else {
- // We don't want SI_MASK_BRANCH/SI_RETURN encoded. They are placeholder
- // terminator instructions and should only be printed as comments.
+ // We don't want SI_MASK_BRANCH/SI_RETURN_TO_EPILOG encoded. They are
+ // placeholder terminator instructions and should only be printed as
+ // comments.
if (MI->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
if (isVerbose()) {
SmallVector<char, 16> BBStr;
@@ -190,9 +213,9 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
- if (MI->getOpcode() == AMDGPU::SI_RETURN) {
+ if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
if (isVerbose())
- OutStreamer->emitRawComment(" return");
+ OutStreamer->emitRawComment(" return to shader part epilog");
return;
}
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 40c3327a98db..27fe639e3d4b 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -12,6 +12,20 @@
using namespace llvm;
+static bool isEntryFunctionCC(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ return true;
+ default:
+ return false;
+ }
+}
+
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
LocalMemoryObjects(),
@@ -19,8 +33,8 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MaxKernArgAlign(0),
LDSSize(0),
ABIArgOffset(0),
- IsKernel(MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_KERNEL ||
- MF.getFunction()->getCallingConv() == CallingConv::SPIR_KERNEL) {
+ IsEntryFunction(isEntryFunctionCC(MF.getFunction()->getCallingConv())),
+ NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
// except reserved size is not correctly aligned.
}
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 5d0640b816f3..8bfeb67ad4ec 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -30,7 +30,11 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
/// Start of implicit kernel args
unsigned ABIArgOffset;
- bool IsKernel;
+ // Kernels + shaders. i.e. functions called by the driver and not not called
+ // by other functions.
+ bool IsEntryFunction;
+
+ bool NoSignedZerosFPMath;
public:
AMDGPUMachineFunction(const MachineFunction &MF);
@@ -66,8 +70,12 @@ public:
return LDSSize;
}
- bool isKernel() const {
- return IsKernel;
+ bool isEntryFunction() const {
+ return IsEntryFunction;
+ }
+
+ bool hasNoSignedZerosFPMath() const {
+ return NoSignedZerosFPMath;
}
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV);
diff --git a/lib/Target/AMDGPU/AMDGPUPTNote.h b/lib/Target/AMDGPU/AMDGPUPTNote.h
index 947d45b66969..71b9ab699b96 100644
--- a/lib/Target/AMDGPU/AMDGPUPTNote.h
+++ b/lib/Target/AMDGPU/AMDGPUPTNote.h
@@ -19,12 +19,13 @@
namespace AMDGPU {
-namespace PT_NOTE {
+namespace ElfNote {
const char SectionName[] = ".note";
const char NoteName[] = "AMD";
+// TODO: Move this enum to include/llvm/Support so it can be used in tools?
enum NoteType{
NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1,
NT_AMDGPU_HSA_HSAIL = 2,
@@ -32,7 +33,7 @@ enum NoteType{
NT_AMDGPU_HSA_PRODUCER = 4,
NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
NT_AMDGPU_HSA_EXTENSION = 6,
- NT_AMDGPU_HSA_RUNTIME_METADATA = 7,
+ NT_AMDGPU_HSA_CODE_OBJECT_METADATA = 10,
NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
};
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index baa28de7a770..4fb262c6277c 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -14,12 +14,49 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <tuple>
+#include <utility>
+#include <vector>
#define DEBUG_TYPE "amdgpu-promote-alloca"
@@ -31,16 +68,16 @@ namespace {
class AMDGPUPromoteAlloca : public FunctionPass {
private:
const TargetMachine *TM;
- Module *Mod;
- const DataLayout *DL;
- MDNode *MaxWorkGroupSizeRange;
+ Module *Mod = nullptr;
+ const DataLayout *DL = nullptr;
+ AMDGPUAS AS;
// FIXME: This should be per-kernel.
- uint32_t LocalMemLimit;
- uint32_t CurrentLocalMemUsage;
+ uint32_t LocalMemLimit = 0;
+ uint32_t CurrentLocalMemUsage = 0;
- bool IsAMDGCN;
- bool IsAMDHSA;
+ bool IsAMDGCN = false;
+ bool IsAMDHSA = false;
std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
@@ -63,15 +100,7 @@ public:
static char ID;
AMDGPUPromoteAlloca(const TargetMachine *TM_ = nullptr) :
- FunctionPass(ID),
- TM(TM_),
- Mod(nullptr),
- DL(nullptr),
- MaxWorkGroupSizeRange(nullptr),
- LocalMemLimit(0),
- CurrentLocalMemUsage(0),
- IsAMDGCN(false),
- IsAMDHSA(false) { }
+ FunctionPass(ID), TM(TM_) {}
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
@@ -86,7 +115,7 @@ public:
}
};
-} // End anonymous namespace
+} // end anonymous namespace
char AMDGPUPromoteAlloca::ID = 0;
@@ -95,7 +124,6 @@ INITIALIZE_TM_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
char &llvm::AMDGPUPromoteAllocaID = AMDGPUPromoteAlloca::ID;
-
bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
if (!TM)
return false;
@@ -103,13 +131,6 @@ bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
Mod = &M;
DL = &Mod->getDataLayout();
- // The maximum workitem id.
- //
- // FIXME: Should get as subtarget property. Usually runtime enforced max is
- // 256.
- MDBuilder MDB(Mod->getContext());
- MaxWorkGroupSizeRange = MDB.createRange(APInt(32, 0), APInt(32, 2048));
-
const Triple &TT = TM->getTargetTriple();
IsAMDGCN = TT.getArch() == Triple::amdgcn;
@@ -125,6 +146,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
if (!ST.isPromoteAllocaEnabled())
return false;
+ AS = AMDGPU::getAMDGPUAS(*F.getParent());
FunctionType *FTy = F.getFunctionType();
@@ -133,7 +155,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
// we cannot use local memory in the pass.
for (Type *ParamTy : FTy->params()) {
PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
- if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
LocalMemLimit = 0;
DEBUG(dbgs() << "Function has local memory argument. Promoting to "
"local memory disabled.\n");
@@ -150,7 +172,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
// Check how much local memory is being used by global objects
CurrentLocalMemUsage = 0;
for (GlobalVariable &GV : Mod->globals()) {
- if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+ if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
continue;
for (const User *U : GV.users()) {
@@ -175,7 +197,8 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
}
}
- unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage);
+ unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
+ F);
// Restrict local memory usage so that we don't drastically reduce occupancy,
// unless it is already significantly reduced.
@@ -196,7 +219,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
// Round up to the next tier of usage.
unsigned MaxSizeWithWaveCount
- = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy);
+ = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
// Program is possibly broken by using more local mem than available.
if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
@@ -226,6 +249,9 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
std::pair<Value *, Value *>
AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
+ *Builder.GetInsertBlock()->getParent());
+
if (!IsAMDHSA) {
Function *LocalSizeYFn
= Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_y);
@@ -235,8 +261,8 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {});
CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {});
- LocalSizeY->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
- LocalSizeZ->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
+ ST.makeLIDRangeMetadata(LocalSizeY);
+ ST.makeLIDRangeMetadata(LocalSizeZ);
return std::make_pair(LocalSizeY, LocalSizeZ);
}
@@ -279,15 +305,15 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
= Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
- DispatchPtr->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
- DispatchPtr->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
// Size of the dispatch packet struct.
- DispatchPtr->addDereferenceableAttr(AttributeSet::ReturnIndex, 64);
+ DispatchPtr->addDereferenceableAttr(AttributeList::ReturnIndex, 64);
Type *I32Ty = Type::getInt32Ty(Mod->getContext());
Value *CastDispatchPtr = Builder.CreateBitCast(
- DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
+ DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS));
// We could do a single 64-bit load here, but it's likely that the basic
// 32-bit and extract sequence is already present, and it is probably easier
@@ -298,10 +324,10 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2);
LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4);
- MDNode *MD = llvm::MDNode::get(Mod->getContext(), None);
+ MDNode *MD = MDNode::get(Mod->getContext(), None);
LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
LoadZU->setMetadata(LLVMContext::MD_invariant_load, MD);
- LoadZU->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
+ ST.makeLIDRangeMetadata(LoadZU);
// Extract y component. Upper half of LoadZU should be zero already.
Value *Y = Builder.CreateLShr(LoadXY, 16);
@@ -310,6 +336,8 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
}
Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
+ *Builder.GetInsertBlock()->getParent());
Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic;
switch (N) {
@@ -332,7 +360,7 @@ Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
CallInst *CI = Builder.CreateCall(WorkitemIdFn);
- CI->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
+ ST.makeLIDRangeMetadata(CI);
return CI;
}
@@ -383,7 +411,7 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
}
}
-static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
+static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
DEBUG(dbgs() << "Alloca candidate for vectorization\n");
@@ -438,7 +466,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
IRBuilder<> Builder(Inst);
switch (Inst->getOpcode()) {
case Instruction::Load: {
- Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
+ Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
Value *Ptr = Inst->getOperand(0);
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
@@ -450,7 +478,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
break;
}
case Instruction::Store: {
- Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
+ Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
Value *Ptr = Inst->getOperand(1);
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
@@ -580,6 +608,9 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
}
if (UseInst->getOpcode() == Instruction::AddrSpaceCast) {
+ // Give up if the pointer may be captured.
+ if (PointerMayBeCaptured(UseInst, true, true))
+ return false;
// Don't collect the users of this.
WorkList.push_back(User);
continue;
@@ -640,7 +671,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
DEBUG(dbgs() << "Trying to promote " << I << '\n');
- if (tryPromoteAllocaToVector(&I)) {
+ if (tryPromoteAllocaToVector(&I, AS)) {
DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
return;
}
@@ -655,8 +686,6 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
const AMDGPUSubtarget &ST =
TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
- // FIXME: We should also try to get this value from the reqd_work_group_size
- // function attribute if it is available.
unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
const DataLayout &DL = Mod->getDataLayout();
@@ -701,7 +730,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
Twine(F->getName()) + Twine('.') + I.getName(),
nullptr,
GlobalVariable::NotThreadLocal,
- AMDGPUAS::LOCAL_ADDRESS);
+ AS.LOCAL_ADDRESS);
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
GV->setAlignment(I.getAlignment());
@@ -734,7 +763,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
Value *Src0 = CI->getOperand(0);
Type *EltTy = Src0->getType()->getPointerElementType();
- PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+ PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
if (isa<ConstantPointerNull>(CI->getOperand(0)))
CI->setOperand(0, ConstantPointerNull::get(NewTy));
@@ -751,7 +780,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
continue;
Type *EltTy = V->getType()->getPointerElementType();
- PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+ PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
// FIXME: It doesn't really make sense to try to do this for all
// instructions.
@@ -819,17 +848,17 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
Type *SrcTy = Src->getType()->getPointerElementType();
Function *ObjectSize = Intrinsic::getDeclaration(Mod,
Intrinsic::objectsize,
- { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
+ { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
);
- CallInst *NewCall
- = Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) });
+ CallInst *NewCall = Builder.CreateCall(
+ ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)});
Intr->replaceAllUsesWith(NewCall);
Intr->eraseFromParent();
continue;
}
default:
- Intr->dump();
+ Intr->print(errs());
llvm_unreachable("Don't know how to promote alloca intrinsic use.");
}
}
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
new file mode 100644
index 000000000000..a5edc0c3b937
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -0,0 +1,230 @@
+//===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the RegisterBankInfo class for
+/// AMDGPU.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPURegisterBankInfo.h"
+#include "AMDGPUInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_TARGET_REGBANK_IMPL
+#include "AMDGPUGenRegisterBank.inc"
+
+// This file will be TableGen'ed at some point.
+#include "AMDGPUGenRegisterBankInfo.def"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
+ : AMDGPUGenRegisterBankInfo(),
+ TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
+
+ // HACK: Until this is fully tablegen'd
+ static bool AlreadyInit = false;
+ if (AlreadyInit)
+ return;
+
+ AlreadyInit = true;
+
+ const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
+ (void)RBSGPR;
+ assert(&RBSGPR == &AMDGPU::SGPRRegBank);
+
+ const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
+ (void)RBVGPR;
+ assert(&RBVGPR == &AMDGPU::VGPRRegBank);
+
+}
+
+unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &A,
+ const RegisterBank &B,
+ unsigned Size) const {
+ return RegisterBankInfo::copyCost(A, B, Size);
+}
+
+const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
+ const TargetRegisterClass &RC) const {
+
+ if (TRI->isSGPRClass(&RC))
+ return getRegBank(AMDGPU::SGPRRegBankID);
+
+ return getRegBank(AMDGPU::VGPRRegBankID);
+}
+
+RegisterBankInfo::InstructionMappings
+AMDGPURegisterBankInfo::getInstrAlternativeMappings(
+ const MachineInstr &MI) const {
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+
+ InstructionMappings AltMappings;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_LOAD: {
+ // FIXME: Should we be hard coding the size for these mappings?
+ InstructionMapping SSMapping(1, 1,
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
+ 2); // Num Operands
+ AltMappings.emplace_back(std::move(SSMapping));
+
+ InstructionMapping VVMapping(2, 1,
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
+ 2); // Num Operands
+ AltMappings.emplace_back(std::move(VVMapping));
+
+ // FIXME: Should this be the pointer-size (64-bits) or the size of the
+ // register that will hold the bufffer resourc (128-bits).
+ InstructionMapping VSMapping(3, 1,
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
+ 2); // Num Operands
+ AltMappings.emplace_back(std::move(VSMapping));
+
+ return AltMappings;
+
+ }
+ default:
+ break;
+ }
+ return RegisterBankInfo::getInstrAlternativeMappings(MI);
+}
+
+void AMDGPURegisterBankInfo::applyMappingImpl(
+ const OperandsMapper &OpdMapper) const {
+ return applyDefaultMapping(OpdMapper);
+}
+
+static bool isInstrUniform(const MachineInstr &MI) {
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return AMDGPU::isUniformMMO(MMO);
+}
+
+RegisterBankInfo::InstructionMapping
+AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ RegisterBankInfo::InstructionMapping Mapping =
+ InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
+ SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+
+ const ValueMapping *ValMapping;
+ const ValueMapping *PtrMapping;
+
+ if (isInstrUniform(MI)) {
+ // We have a uniform instruction so we want to use an SMRD load
+ ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
+ } else {
+ ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ // FIXME: What would happen if we used SGPRRegBankID here?
+ PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
+ }
+
+ OpdsMapping[0] = ValMapping;
+ OpdsMapping[1] = PtrMapping;
+ Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+ return Mapping;
+
+ // FIXME: Do we want to add a mapping for FLAT load, or should we just
+ // handle that during instruction selection?
+}
+
+RegisterBankInfo::InstructionMapping
+AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
+
+ if (Mapping.isValid())
+ return Mapping;
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
+ SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
+
+ switch (MI.getOpcode()) {
+ default: break;
+ case AMDGPU::G_CONSTANT: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+ return Mapping;
+ }
+ case AMDGPU::G_GEP: {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isReg())
+ continue;
+
+ unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits();
+ OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ }
+ Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+ return Mapping;
+ }
+ case AMDGPU::G_STORE: {
+ assert(MI.getOperand(0).isReg());
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ // FIXME: We need to specify a different reg bank once scalar stores
+ // are supported.
+ const ValueMapping *ValMapping =
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ // FIXME: Depending on the type of store, the pointer could be in
+ // the SGPR Reg bank.
+ // FIXME: Pointer size should be based on the address space.
+ const ValueMapping *PtrMapping =
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
+
+ OpdsMapping[0] = ValMapping;
+ OpdsMapping[1] = PtrMapping;
+ Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+ return Mapping;
+ }
+
+ case AMDGPU::G_LOAD:
+ return getInstrMappingForLoad(MI);
+ }
+
+ unsigned BankID = AMDGPU::SGPRRegBankID;
+
+ Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
+ unsigned Size = 0;
+ for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) {
+ // If the operand is not a register default to the size of the previous
+ // operand.
+ // FIXME: Can't we pull the types from the MachineInstr rather than the
+ // operands.
+ if (MI.getOperand(Idx).isReg())
+ Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI);
+ OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size));
+ }
+ Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+
+ return Mapping;
+}
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
new file mode 100644
index 000000000000..f13bde87ef2d
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -0,0 +1,65 @@
+//===- AMDGPURegisterBankInfo -----------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the RegisterBankInfo class for AMDGPU.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
+
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+
+namespace llvm {
+
+class SIRegisterInfo;
+class TargetRegisterInfo;
+
+namespace AMDGPU {
+enum {
+ SGPRRegBankID = 0,
+ VGPRRegBankID = 1,
+ NumRegisterBanks
+};
+} // End AMDGPU namespace.
+
+/// This class provides the information for the target register banks.
+class AMDGPUGenRegisterBankInfo : public RegisterBankInfo {
+
+protected:
+
+#define GET_TARGET_REGBANK_CLASS
+#include "AMDGPUGenRegisterBank.inc"
+
+};
+class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
+ const SIRegisterInfo *TRI;
+
+ /// See RegisterBankInfo::applyMapping.
+ void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+
+ RegisterBankInfo::InstructionMapping
+ getInstrMappingForLoad(const MachineInstr &MI) const;
+
+public:
+ AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
+
+ unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
+ unsigned Size) const override;
+
+ const RegisterBank &
+ getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
+
+ InstructionMappings
+ getInstrAlternativeMappings(const MachineInstr &MI) const override;
+
+ InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
+};
+} // End llvm namespace.
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/lib/Target/AMDGPU/AMDGPURegisterBanks.td
new file mode 100644
index 000000000000..f4428e56035f
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -0,0 +1,16 @@
+//=- AMDGPURegisterBank.td - Describe the AMDGPU Banks -------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def SGPRRegBank : RegisterBank<"SGPR",
+ [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512]
+>;
+
+def VGPRRegBank : RegisterBank<"VGPR",
+ [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
+>;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index ef51aad95dce..22b1663821d9 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -16,10 +16,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERINFO_H
-#include "llvm/Target/TargetRegisterInfo.h"
-
#define GET_REGINFO_HEADER
-#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
namespace llvm {
diff --git a/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h b/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
deleted file mode 100644
index ecd2ac72bf1b..000000000000
--- a/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
+++ /dev/null
@@ -1,193 +0,0 @@
-//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// Enums and structure types used by runtime metadata.
-///
-/// Runtime requests certain information (metadata) about kernels to be able
-/// to execute the kernels and answer the queries about the kernels.
-/// The metadata is represented as a note element in the .note ELF section of a
-/// binary (code object). The desc field of the note element is a YAML string
-/// consisting of key-value pairs. Each key is a string. Each value can be
-/// an integer, a string, or an YAML sequence. There are 3 levels of YAML maps.
-/// At the beginning of the YAML string is the module level YAML map. A
-/// kernel-level YAML map is in the amd.Kernels sequence. A
-/// kernel-argument-level map is in the amd.Args sequence.
-///
-/// The format should be kept backward compatible. New enum values and bit
-/// fields should be appended at the end. It is suggested to bump up the
-/// revision number whenever the format changes and document the change
-/// in the revision in this header.
-///
-//
-//===----------------------------------------------------------------------===//
-//
-#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
-#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
-
-#include <cstdint>
-#include <vector>
-#include <string>
-
-namespace AMDGPU {
-
-namespace RuntimeMD {
-
- // Version and revision of runtime metadata
- const unsigned char MDVersion = 2;
- const unsigned char MDRevision = 0;
-
- // Name of keys for runtime metadata.
- namespace KeyName {
- const char MDVersion[] = "amd.MDVersion"; // Runtime metadata version
- const char Language[] = "amd.Language"; // Language
- const char LanguageVersion[] = "amd.LanguageVersion"; // Language version
- const char Kernels[] = "amd.Kernels"; // Kernels
- const char KernelName[] = "amd.KernelName"; // Kernel name
- const char Args[] = "amd.Args"; // Kernel arguments
- const char ArgSize[] = "amd.ArgSize"; // Kernel arg size
- const char ArgAlign[] = "amd.ArgAlign"; // Kernel arg alignment
- const char ArgTypeName[] = "amd.ArgTypeName"; // Kernel type name
- const char ArgName[] = "amd.ArgName"; // Kernel name
- const char ArgKind[] = "amd.ArgKind"; // Kernel argument kind
- const char ArgValueType[] = "amd.ArgValueType"; // Kernel argument value type
- const char ArgAddrQual[] = "amd.ArgAddrQual"; // Kernel argument address qualifier
- const char ArgAccQual[] = "amd.ArgAccQual"; // Kernel argument access qualifier
- const char ArgIsConst[] = "amd.ArgIsConst"; // Kernel argument is const qualified
- const char ArgIsRestrict[] = "amd.ArgIsRestrict"; // Kernel argument is restrict qualified
- const char ArgIsVolatile[] = "amd.ArgIsVolatile"; // Kernel argument is volatile qualified
- const char ArgIsPipe[] = "amd.ArgIsPipe"; // Kernel argument is pipe qualified
- const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize"; // Required work group size
- const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint"; // Work group size hint
- const char VecTypeHint[] = "amd.VecTypeHint"; // Vector type hint
- const char KernelIndex[] = "amd.KernelIndex"; // Kernel index for device enqueue
- const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups"; // No partial work groups
- const char PrintfInfo[] = "amd.PrintfInfo"; // Prinf function call information
- const char ArgActualAcc[] = "amd.ArgActualAcc"; // The actual kernel argument access qualifier
- const char ArgPointeeAlign[] = "amd.ArgPointeeAlign"; // Alignment of pointee type
- }
-
- namespace KernelArg {
- enum Kind : uint8_t {
- ByValue = 0,
- GlobalBuffer = 1,
- DynamicSharedPointer = 2,
- Sampler = 3,
- Image = 4,
- Pipe = 5,
- Queue = 6,
- HiddenGlobalOffsetX = 7,
- HiddenGlobalOffsetY = 8,
- HiddenGlobalOffsetZ = 9,
- HiddenNone = 10,
- HiddenPrintfBuffer = 11,
- HiddenDefaultQueue = 12,
- HiddenCompletionAction = 13,
- };
-
- enum ValueType : uint16_t {
- Struct = 0,
- I8 = 1,
- U8 = 2,
- I16 = 3,
- U16 = 4,
- F16 = 5,
- I32 = 6,
- U32 = 7,
- F32 = 8,
- I64 = 9,
- U64 = 10,
- F64 = 11,
- };
-
- // Avoid using 'None' since it conflicts with a macro in X11 header file.
- enum AccessQualifer : uint8_t {
- AccNone = 0,
- ReadOnly = 1,
- WriteOnly = 2,
- ReadWrite = 3,
- };
-
- enum AddressSpaceQualifer : uint8_t {
- Private = 0,
- Global = 1,
- Constant = 2,
- Local = 3,
- Generic = 4,
- Region = 5,
- };
- } // namespace KernelArg
-
- // Invalid values are used to indicate an optional key should not be emitted.
- const uint8_t INVALID_ADDR_QUAL = 0xff;
- const uint8_t INVALID_ACC_QUAL = 0xff;
- const uint32_t INVALID_KERNEL_INDEX = ~0U;
-
- namespace KernelArg {
- // In-memory representation of kernel argument information.
- struct Metadata {
- uint32_t Size;
- uint32_t Align;
- uint32_t PointeeAlign;
- uint8_t Kind;
- uint16_t ValueType;
- std::string TypeName;
- std::string Name;
- uint8_t AddrQual;
- uint8_t AccQual;
- uint8_t IsVolatile;
- uint8_t IsConst;
- uint8_t IsRestrict;
- uint8_t IsPipe;
- Metadata() : Size(0), Align(0), PointeeAlign(0), Kind(0), ValueType(0),
- AddrQual(INVALID_ADDR_QUAL), AccQual(INVALID_ACC_QUAL), IsVolatile(0),
- IsConst(0), IsRestrict(0), IsPipe(0) {}
- };
- }
-
- namespace Kernel {
- // In-memory representation of kernel information.
- struct Metadata {
- std::string Name;
- std::string Language;
- std::vector<uint8_t> LanguageVersion;
- std::vector<uint32_t> ReqdWorkGroupSize;
- std::vector<uint32_t> WorkGroupSizeHint;
- std::string VecTypeHint;
- uint32_t KernelIndex;
- uint8_t NoPartialWorkGroups;
- std::vector<KernelArg::Metadata> Args;
- Metadata() : KernelIndex(INVALID_KERNEL_INDEX), NoPartialWorkGroups(0) {}
- };
- }
-
- namespace Program {
- // In-memory representation of program information.
- struct Metadata {
- std::vector<uint8_t> MDVersionSeq;
- std::vector<std::string> PrintfInfo;
- std::vector<Kernel::Metadata> Kernels;
-
- explicit Metadata(){}
-
- // Construct from an YAML string.
- explicit Metadata(const std::string &YAML);
-
- // Convert to YAML string.
- std::string toYAML();
-
- // Convert from YAML string.
- static Metadata fromYAML(const std::string &S);
- };
- }
-} // namespace RuntimeMD
-} // namespace AMDGPU
-
-#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index c35a67de1d7f..972c28579f7a 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -13,8 +13,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/Target/TargetFrameLowering.h"
#include <algorithm>
@@ -22,7 +24,6 @@ using namespace llvm;
#define DEBUG_TYPE "amdgpu-subtarget"
-#define GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "AMDGPUGenSubtargetInfo.inc"
@@ -41,9 +42,10 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
// for SI has the unhelpful behavior that it unsets everything else if you
// disable it.
- SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
+ SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+dx10-clamp,+load-store-opt,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
- FullFS += "+flat-for-global,+unaligned-buffer-access,";
+ FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
+
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
@@ -59,9 +61,8 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- FP16Denormals = false;
+ FP64FP16Denormals = false;
FP32Denormals = false;
- FP64Denormals = false;
}
// Set defaults if needed.
@@ -85,15 +86,17 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FastFMAF32(false),
HalfRate64Ops(false),
- FP16Denormals(false),
FP32Denormals(false),
- FP64Denormals(false),
+ FP64FP16Denormals(false),
FPExceptions(false),
+ DX10Clamp(false),
FlatForGlobal(false),
UnalignedScratchAccess(false),
UnalignedBufferAccess(false),
+ HasApertureRegs(false),
EnableXNACK(false),
+ TrapHandler(false),
DebuggerInsertNops(false),
DebuggerReserveRegs(false),
DebuggerEmitPrologue(false),
@@ -110,13 +113,17 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
GCN1Encoding(false),
GCN3Encoding(false),
CIInsts(false),
+ GFX9Insts(false),
SGPRInitBug(false),
HasSMemRealTime(false),
Has16BitInsts(false),
+ HasVOP3PInsts(false),
HasMovrel(false),
HasVGPRIndexMode(false),
HasScalarStores(false),
HasInv2PiInlineImm(false),
+ HasSDWA(false),
+ HasDPP(false),
FlatAddressSpace(false),
R600ALUInst(false),
@@ -128,65 +135,30 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FeatureDisable(false),
InstrItins(getInstrItineraryForCPU(GPU)) {
+ AS = AMDGPU::getAMDGPUAS(TT);
initializeSubtargetDependencies(TT, GPU, FS);
}
-// FIXME: These limits are for SI. Did they change with the larger maximum LDS
-// size?
-unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
- switch (NWaves) {
- case 10:
- return 1638;
- case 9:
- return 1820;
- case 8:
- return 2048;
- case 7:
- return 2340;
- case 6:
- return 2730;
- case 5:
- return 3276;
- case 4:
- return 4096;
- case 3:
- return 5461;
- case 2:
- return 8192;
- default:
+unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
+ const Function &F) const {
+ if (NWaves == 1)
return getLocalMemorySize();
- }
+ unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
+ unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
+ unsigned MaxWaves = getMaxWavesPerEU();
+ return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
}
-unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
- if (Bytes <= 1638)
- return 10;
-
- if (Bytes <= 1820)
- return 9;
-
- if (Bytes <= 2048)
- return 8;
-
- if (Bytes <= 2340)
- return 7;
-
- if (Bytes <= 2730)
- return 6;
-
- if (Bytes <= 3276)
- return 5;
-
- if (Bytes <= 4096)
- return 4;
-
- if (Bytes <= 5461)
- return 3;
-
- if (Bytes <= 8192)
- return 2;
-
- return 1;
+unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
+ const Function &F) const {
+ unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
+ unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
+ unsigned MaxWaves = getMaxWavesPerEU();
+ unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
+ unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
+ NumWaves = std::min(NumWaves, MaxWaves);
+ NumWaves = std::max(NumWaves, 1u);
+ return NumWaves;
}
std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
@@ -224,7 +196,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
const Function &F) const {
// Default minimum/maximum number of waves per execution unit.
- std::pair<unsigned, unsigned> Default(1, 0);
+ std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
// Default/requested minimum/maximum flat work group sizes.
std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
@@ -269,6 +241,65 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
return Requested;
}
+bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
+ Function *Kernel = I->getParent()->getParent();
+ unsigned MinSize = 0;
+ unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
+ bool IdQuery = false;
+
+ // If reqd_work_group_size is present it narrows value down.
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ const Function *F = CI->getCalledFunction();
+ if (F) {
+ unsigned Dim = UINT_MAX;
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::amdgcn_workitem_id_x:
+ case Intrinsic::r600_read_tidig_x:
+ IdQuery = true;
+ case Intrinsic::r600_read_local_size_x:
+ Dim = 0;
+ break;
+ case Intrinsic::amdgcn_workitem_id_y:
+ case Intrinsic::r600_read_tidig_y:
+ IdQuery = true;
+ case Intrinsic::r600_read_local_size_y:
+ Dim = 1;
+ break;
+ case Intrinsic::amdgcn_workitem_id_z:
+ case Intrinsic::r600_read_tidig_z:
+ IdQuery = true;
+ case Intrinsic::r600_read_local_size_z:
+ Dim = 2;
+ break;
+ default:
+ break;
+ }
+ if (Dim <= 3) {
+ if (auto Node = Kernel->getMetadata("reqd_work_group_size"))
+ if (Node->getNumOperands() == 3)
+ MinSize = MaxSize = mdconst::extract<ConstantInt>(
+ Node->getOperand(Dim))->getZExtValue();
+ }
+ }
+ }
+
+ if (!MaxSize)
+ return false;
+
+ // Range metadata is [Lo, Hi). For ID query we need to pass max size
+ // as Hi. For size query we need to pass Hi + 1.
+ if (IdQuery)
+ MinSize = 0;
+ else
+ ++MaxSize;
+
+ MDBuilder MDB(I->getContext());
+ MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize),
+ APInt(32, MaxSize));
+ I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
+ return true;
+}
+
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
const TargetMachine &TM) :
AMDGPUSubtarget(TT, GPU, FS, TM),
@@ -305,7 +336,7 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
}
unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
- unsigned ExplicitArgBytes) const {
+ unsigned ExplicitArgBytes) const {
unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
if (ImplicitBytes == 0)
return ExplicitArgBytes;
@@ -359,12 +390,100 @@ unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
return 1;
}
-unsigned SISubtarget::getMaxNumSGPRs() const {
+unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ if (MFI.hasFlatScratchInit()) {
+ if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
+ if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
+ return 4; // FLAT_SCRATCH, VCC (in that order).
+ }
+
+ if (isXNACKEnabled())
+ return 4; // XNACK, VCC (in that order).
+ return 2; // VCC.
+}
+
+unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
+ const Function &F = *MF.getFunction();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+
+ // Compute maximum number of SGPRs function can use using default/requested
+ // minimum number of waves per execution unit.
+ std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
+ unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
+ unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
+
+ // Check if maximum number of SGPRs was explicitly requested using
+ // "amdgpu-num-sgpr" attribute.
+ if (F.hasFnAttribute("amdgpu-num-sgpr")) {
+ unsigned Requested = AMDGPU::getIntegerAttribute(
+ F, "amdgpu-num-sgpr", MaxNumSGPRs);
+
+ // Make sure requested value does not violate subtarget's specifications.
+ if (Requested && (Requested <= getReservedNumSGPRs(MF)))
+ Requested = 0;
+
+ // If more SGPRs are required to support the input user/system SGPRs,
+ // increase to accommodate them.
+ //
+ // FIXME: This really ends up using the requested number of SGPRs + number
+ // of reserved special registers in total. Theoretically you could re-use
+ // the last input registers for these special registers, but this would
+ // require a lot of complexity to deal with the weird aliasing.
+ unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
+ if (Requested && Requested < InputNumSGPRs)
+ Requested = InputNumSGPRs;
+
+ // Make sure requested value is compatible with values implied by
+ // default/requested minimum/maximum number of waves per execution unit.
+ if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
+ Requested = 0;
+ if (WavesPerEU.second &&
+ Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
+ Requested = 0;
+
+ if (Requested)
+ MaxNumSGPRs = Requested;
+ }
+
if (hasSGPRInitBug())
- return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+
+ return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
+ MaxAddressableNumSGPRs);
+}
- if (getGeneration() >= VOLCANIC_ISLANDS)
- return 102;
+unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
+ const Function &F = *MF.getFunction();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+
+ // Compute maximum number of VGPRs function can use using default/requested
+ // minimum number of waves per execution unit.
+ std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
+ unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
+
+ // Check if maximum number of VGPRs was explicitly requested using
+ // "amdgpu-num-vgpr" attribute.
+ if (F.hasFnAttribute("amdgpu-num-vgpr")) {
+ unsigned Requested = AMDGPU::getIntegerAttribute(
+ F, "amdgpu-num-vgpr", MaxNumVGPRs);
+
+ // Make sure requested value does not violate subtarget's specifications.
+ if (Requested && Requested <= getReservedNumVGPRs(MF))
+ Requested = 0;
+
+ // Make sure requested value is compatible with values implied by
+ // default/requested minimum/maximum number of waves per execution unit.
+ if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
+ Requested = 0;
+ if (WavesPerEU.second &&
+ Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
+ Requested = 0;
+
+ if (Requested)
+ MaxNumVGPRs = Requested;
+ }
- return 104;
+ return MaxNumVGPRs - getReservedNumVGPRs(MF);
}
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 0e3cb7dc1f87..36bc2498781f 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -22,6 +22,7 @@
#include "SIInstrInfo.h"
#include "SIISelLowering.h"
#include "SIFrameLowering.h"
+#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
@@ -51,6 +52,7 @@ public:
SOUTHERN_ISLANDS,
SEA_ISLANDS,
VOLCANIC_ISLANDS,
+ GFX9,
};
enum {
@@ -64,6 +66,28 @@ public:
ISAVersion8_0_3,
ISAVersion8_0_4,
ISAVersion8_1_0,
+ ISAVersion9_0_0,
+ ISAVersion9_0_1
+ };
+
+ enum TrapHandlerAbi {
+ TrapHandlerAbiNone = 0,
+ TrapHandlerAbiHsa = 1
+ };
+
+ enum TrapID {
+ TrapIDHardwareReserved = 0,
+ TrapIDHSADebugTrap = 1,
+ TrapIDLLVMTrap = 2,
+ TrapIDLLVMDebugTrap = 3,
+ TrapIDDebugBreakpoint = 7,
+ TrapIDDebugReserved8 = 8,
+ TrapIDDebugReservedFE = 0xfe,
+ TrapIDDebugReservedFF = 0xff
+ };
+
+ enum TrapRegValues {
+ LLVMTrapHandlerRegValue = 1
};
protected:
@@ -81,14 +105,16 @@ protected:
bool HalfRate64Ops;
// Dynamially set bits that enable features.
- bool FP16Denormals;
bool FP32Denormals;
- bool FP64Denormals;
+ bool FP64FP16Denormals;
bool FPExceptions;
+ bool DX10Clamp;
bool FlatForGlobal;
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
+ bool HasApertureRegs;
bool EnableXNACK;
+ bool TrapHandler;
bool DebuggerInsertNops;
bool DebuggerReserveRegs;
bool DebuggerEmitPrologue;
@@ -107,13 +133,17 @@ protected:
bool GCN1Encoding;
bool GCN3Encoding;
bool CIInsts;
+ bool GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
bool Has16BitInsts;
+ bool HasVOP3PInsts;
bool HasMovrel;
bool HasVGPRIndexMode;
bool HasScalarStores;
bool HasInv2PiInlineImm;
+ bool HasSDWA;
+ bool HasDPP;
bool FlatAddressSpace;
bool R600ALUInst;
bool CaymanISA;
@@ -127,6 +157,7 @@ protected:
InstrItineraryData InstrItins;
SelectionDAGTargetInfo TSInfo;
+ AMDGPUAS AS;
public:
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
@@ -184,10 +215,18 @@ public:
return MaxPrivateElementSize;
}
+ AMDGPUAS getAMDGPUAS() const {
+ return AS;
+ }
+
bool has16BitInsts() const {
return Has16BitInsts;
}
+ bool hasVOP3PInsts() const {
+ return HasVOP3PInsts;
+ }
+
bool hasHWFP64() const {
return FP64;
}
@@ -243,6 +282,10 @@ public:
return (getGeneration() >= EVERGREEN);
}
+ bool hasMed3_16() const {
+ return getGeneration() >= GFX9;
+ }
+
bool hasCARRY() const {
return (getGeneration() >= EVERGREEN);
}
@@ -255,6 +298,10 @@ public:
return CaymanISA;
}
+ TrapHandlerAbi getTrapHandlerAbi() const {
+ return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
+ }
+
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
@@ -267,20 +314,22 @@ public:
return DumpCode;
}
- bool enableIEEEBit(const MachineFunction &MF) const {
- return AMDGPU::isCompute(MF.getFunction()->getCallingConv());
- }
-
/// Return the amount of LDS that can be used that will not restrict the
/// occupancy lower than WaveCount.
- unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const;
+ unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
+ const Function &) const;
/// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
/// the given LDS memory size is the only constraint.
- unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
+ unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
+
+ unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
+ const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ return getOccupancyWithLocalMemSize(MFI->getLDSSize(), *MF.getFunction());
+ }
bool hasFP16Denormals() const {
- return FP16Denormals;
+ return FP64FP16Denormals;
}
bool hasFP32Denormals() const {
@@ -288,13 +337,21 @@ public:
}
bool hasFP64Denormals() const {
- return FP64Denormals;
+ return FP64FP16Denormals;
}
bool hasFPExceptions() const {
return FPExceptions;
}
+ bool enableDX10Clamp() const {
+ return DX10Clamp;
+ }
+
+ bool enableIEEEBit(const MachineFunction &MF) const {
+ return AMDGPU::isCompute(MF.getFunction()->getCallingConv());
+ }
+
bool useFlatForGlobal() const {
return FlatForGlobal;
}
@@ -307,10 +364,22 @@ public:
return UnalignedScratchAccess;
}
+ bool hasApertureRegs() const {
+ return HasApertureRegs;
+ }
+
+ bool isTrapHandlerEnabled() const {
+ return TrapHandler;
+ }
+
bool isXNACKEnabled() const {
return EnableXNACK;
}
+ bool hasFlatAddressSpace() const {
+ return FlatAddressSpace;
+ }
+
bool isMesaKernel(const MachineFunction &MF) const {
return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
}
@@ -324,6 +393,10 @@ public:
return isAmdHsaOS() || isMesaKernel(MF);
}
+ bool hasFminFmaxLegacy() const {
+ return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ }
+
/// \brief Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
@@ -355,72 +428,71 @@ public:
return true;
}
+ void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
+ bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
+
/// \returns Number of execution units per compute unit supported by the
/// subtarget.
unsigned getEUsPerCU() const {
- return 4;
+ return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
}
/// \returns Maximum number of work groups per compute unit supported by the
- /// subtarget and limited by given flat work group size.
+ /// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
- if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 8;
- return getWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16;
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerCU() const {
- return getMaxWavesPerEU() * getEUsPerCU();
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
}
/// \returns Maximum number of waves per compute unit supported by the
- /// subtarget and limited by given flat work group size.
+ /// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return getWavesPerWorkGroup(FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
unsigned getMinWavesPerEU() const {
- return 1;
+ return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
}
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerEU() const {
- if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 8;
- // FIXME: Need to take scratch memory into account.
- return 10;
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
}
/// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given flat work group size.
+ /// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
- return alignTo(getMaxWavesPerCU(FlatWorkGroupSize), getEUsPerCU()) /
- getEUsPerCU();
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Minimum flat work group size supported by the subtarget.
unsigned getMinFlatWorkGroupSize() const {
- return 1;
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
}
/// \returns Maximum flat work group size supported by the subtarget.
unsigned getMaxFlatWorkGroupSize() const {
- return 2048;
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
}
- /// \returns Number of waves per work group given the flat work group size.
+ /// \returns Number of waves per work group supported by the subtarget and
+ /// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize();
+ return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
+ FlatWorkGroupSize);
}
- void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
- bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
-
/// \returns Subtarget's default pair of minimum/maximum flat work group sizes
/// for function \p F, or minimum/maximum flat work group sizes explicitly
/// requested using "amdgpu-flat-work-group-size" attribute attached to
@@ -440,6 +512,9 @@ public:
/// compatible with minimum/maximum number of waves limited by flat work group
/// size, register usage, and/or lds usage.
std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
+
+ /// Creates value range metadata on an workitemid.* inrinsic call or load.
+ bool makeLIDRangeMetadata(Instruction *I) const;
};
class R600Subtarget final : public AMDGPUSubtarget {
@@ -482,13 +557,6 @@ public:
};
class SISubtarget final : public AMDGPUSubtarget {
-public:
- enum {
- // The closed Vulkan driver sets 96, which limits the wave count to 8 but
- // doesn't spill SGPRs as much as when 80 is set.
- FIXED_SGPR_COUNT_FOR_INIT_BUG = 96
- };
-
private:
SIInstrInfo InstrInfo;
SIFrameLowering FrameLowering;
@@ -516,6 +584,21 @@ public:
return GISel->getCallLowering();
}
+ const InstructionSelector *getInstructionSelector() const override {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getInstructionSelector();
+ }
+
+ const LegalizerInfo *getLegalizerInfo() const override {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getLegalizerInfo();
+ }
+
+ const RegisterBankInfo *getRegBankInfo() const override {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getRegBankInfo();
+ }
+
const SIRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
@@ -524,6 +607,11 @@ public:
this->GISel.reset(&GISel);
}
+ // XXX - Why is this here if it isn't in the default pass set?
+ bool enableEarlyIfConversion() const override {
+ return true;
+ }
+
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
@@ -533,10 +621,6 @@ public:
return 16;
}
- bool hasFlatAddressSpace() const {
- return FlatAddressSpace;
- }
-
bool hasSMemRealTime() const {
return HasSMemRealTime;
}
@@ -549,6 +633,10 @@ public:
return HasVGPRIndexMode;
}
+ bool useVGPRIndexMode(bool UserEnable) const {
+ return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
+ }
+
bool hasScalarCompareEq64() const {
return getGeneration() >= VOLCANIC_ISLANDS;
}
@@ -561,6 +649,14 @@ public:
return HasInv2PiInlineImm;
}
+ bool hasSDWA() const {
+ return HasSDWA;
+ }
+
+ bool hasDPP() const {
+ return HasDPP;
+ }
+
bool enableSIScheduler() const {
return EnableSIScheduler;
}
@@ -594,6 +690,14 @@ public:
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
+ bool hasSMovFedHazard() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasReadM0Hazard() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
@@ -605,10 +709,107 @@ public:
/// \returns True if waitcnt instruction is needed before barrier instruction,
/// false otherwise.
bool needWaitcntBeforeBarrier() const {
- return true;
+ return getGeneration() < GFX9;
+ }
+
+ /// \returns true if the flat_scratch register should be initialized with the
+ /// pointer to the wave's scratch memory rather than a size and offset.
+ bool flatScratchIsPointer() const {
+ return getGeneration() >= GFX9;
+ }
+
+ /// \returns SGPR allocation granularity supported by the subtarget.
+ unsigned getSGPRAllocGranule() const {
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
+ }
+
+ /// \returns SGPR encoding granularity supported by the subtarget.
+ unsigned getSGPREncodingGranule() const {
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
}
- unsigned getMaxNumSGPRs() const;
+ /// \returns Total number of SGPRs supported by the subtarget.
+ unsigned getTotalNumSGPRs() const {
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
+ }
+
+ /// \returns Addressable number of SGPRs supported by the subtarget.
+ unsigned getAddressableNumSGPRs() const {
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
+ }
+
+ /// \returns Minimum number of SGPRs that meets the given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
+ }
+
+ /// \returns Maximum number of SGPRs that meets the given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
+ Addressable);
+ }
+
+ /// \returns Reserved number of SGPRs for given function \p MF.
+ unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
+
+ /// \returns Maximum number of SGPRs that meets number of waves per execution
+ /// unit requirement for function \p MF, or number of SGPRs explicitly
+ /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
+ ///
+ /// \returns Value that meets number of waves per execution unit requirement
+ /// if explicitly requested value cannot be converted to integer, violates
+ /// subtarget's specifications, or does not meet number of waves per execution
+ /// unit requirement.
+ unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
+
+ /// \returns VGPR allocation granularity supported by the subtarget.
+ unsigned getVGPRAllocGranule() const {
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());;
+ }
+
+ /// \returns VGPR encoding granularity supported by the subtarget.
+ unsigned getVGPREncodingGranule() const {
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
+ }
+
+ /// \returns Total number of VGPRs supported by the subtarget.
+ unsigned getTotalNumVGPRs() const {
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
+ }
+
+ /// \returns Addressable number of VGPRs supported by the subtarget.
+ unsigned getAddressableNumVGPRs() const {
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
+ }
+
+ /// \returns Minimum number of VGPRs that meets given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
+ }
+
+ /// \returns Maximum number of VGPRs that meets given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
+ }
+
+ /// \returns Reserved number of VGPRs for given function \p MF.
+ unsigned getReservedNumVGPRs(const MachineFunction &MF) const {
+ return debuggerReserveRegs() ? 4 : 0;
+ }
+
+ /// \returns Maximum number of VGPRs that meets number of waves per execution
+ /// unit requirement for function \p MF, or number of VGPRs explicitly
+ /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
+ ///
+ /// \returns Value that meets number of waves per execution unit requirement
+ /// if explicitly requested value cannot be converted to integer, violates
+ /// subtarget's specifications, or does not meet number of waves per execution
+ /// unit requirement.
+ unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index d8a0c716279c..0202220b8011 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -15,24 +15,29 @@
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
+#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCallLowering.h"
+#include "AMDGPUInstructionSelector.h"
+#include "AMDGPULegalizerInfo.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "AMDGPURegisterBankInfo.h"
+#endif
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
+#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "R600MachineScheduler.h"
#include "SIMachineScheduler.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
-#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Vectorize.h"
@@ -58,6 +63,11 @@ static cl::opt<bool> EnableSROA(
cl::ReallyHidden,
cl::init(true));
+static cl::opt<bool>
+EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
+ cl::desc("Run early if-conversion"),
+ cl::init(false));
+
static cl::opt<bool> EnableR600IfConvert(
"r600-if-convert",
cl::desc("Use if conversion pass"),
@@ -78,6 +88,36 @@ static cl::opt<bool> ScalarizeGlobal(
cl::init(false),
cl::Hidden);
+// Option to run internalize pass.
+static cl::opt<bool> InternalizeSymbols(
+ "amdgpu-internalize-symbols",
+ cl::desc("Enable elimination of non-kernel functions and unused globals"),
+ cl::init(false),
+ cl::Hidden);
+
+// Option to inline all early.
+static cl::opt<bool> EarlyInlineAll(
+ "amdgpu-early-inline-all",
+ cl::desc("Inline all functions early"),
+ cl::init(false),
+ cl::Hidden);
+
+static cl::opt<bool> EnableSDWAPeephole(
+ "amdgpu-sdwa-peephole",
+ cl::desc("Enable SDWA peepholer"),
+ cl::init(true));
+
+// Enable address space based alias analysis
+static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
+ cl::desc("Enable AMDGPU Alias Analysis"),
+ cl::init(true));
+
+// Option to enable new waitcnt insertion pass.
+static cl::opt<bool> EnableSIInsertWaitcntsPass(
+ "enable-si-insert-waitcnts",
+ cl::desc("Use new waitcnt insertion pass"),
+ cl::init(false));
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -86,22 +126,28 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
PassRegistry *PR = PassRegistry::getPassRegistry();
initializeSILowerI1CopiesPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
+ initializeSIFixVGPRCopiesPass(*PR);
initializeSIFoldOperandsPass(*PR);
+ initializeSIPeepholeSDWAPass(*PR);
initializeSIShrinkInstructionsPass(*PR);
initializeSIFixControlFlowLiveIntervalsPass(*PR);
initializeSILoadStoreOptimizerPass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);
+ initializeAMDGPULowerIntrinsicsPass(*PR);
initializeAMDGPUPromoteAllocaPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
initializeSIInsertWaitsPass(*PR);
+ initializeSIInsertWaitcntsPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
initializeSIInsertSkipsPass(*PR);
initializeSIDebuggerInsertNopsPass(*PR);
initializeSIOptimizeExecMaskingPass(*PR);
+ initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
+ initializeAMDGPUAAWrapperPassPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -119,13 +165,26 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
- new ScheduleDAGMILive(C,
- llvm::make_unique<GCNMaxOccupancySchedStrategy>(C));
+ new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
return DAG;
}
+static ScheduleDAGInstrs *
+createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
+ auto DAG = new GCNIterativeScheduler(C,
+ GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+ return DAG;
+}
+
+static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
+ return new GCNIterativeScheduler(C,
+ GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
+}
+
static MachineSchedRegistry
R600SchedRegistry("r600", "Run R600's custom scheduler",
createR600MachineScheduler);
@@ -139,6 +198,16 @@ GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
"Run GCN scheduler to maximize occupancy",
createGCNMaxOccupancyMachineScheduler);
+static MachineSchedRegistry
+IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
+ "Run GCN scheduler to maximize occupancy (experimental)",
+ createIterativeGCNMaxOccupancyMachineScheduler);
+
+static MachineSchedRegistry
+GCNMinRegSchedRegistry("gcn-minreg",
+ "Run GCN iterative scheduler for minimal register usage (experimental)",
+ createMinRegScheduler);
+
static StringRef computeDataLayout(const Triple &TT) {
if (TT.getArch() == Triple::r600) {
// 32-bit pointers.
@@ -148,9 +217,14 @@ static StringRef computeDataLayout(const Triple &TT) {
// 32-bit private, local, and region pointers. 64-bit global, constant and
// flat.
- return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
+ if (TT.getEnvironmentName() == "amdgiz" ||
+ TT.getEnvironmentName() == "amdgizcl")
+ return "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32"
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
- "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
+ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
+ return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
+ "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
}
LLVM_READNONE
@@ -180,6 +254,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
TLOF(createTLOF(getTargetTriple())) {
+ AS = AMDGPU::getAMDGPUAS(TT);
initAsmInfo();
}
@@ -199,8 +274,65 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
FSAttr.getValueAsString();
}
-void AMDGPUTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {
- PM.add(createAMDGPUUnifyMetadataPass());
+static ImmutablePass *createAMDGPUExternalAAWrapperPass() {
+ return createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ });
+}
+
+void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
+ Builder.DivergentTarget = true;
+
+ bool Internalize = InternalizeSymbols &&
+ (getOptLevel() > CodeGenOpt::None) &&
+ (getTargetTriple().getArch() == Triple::amdgcn);
+ bool EarlyInline = EarlyInlineAll &&
+ (getOptLevel() > CodeGenOpt::None);
+ bool AMDGPUAA = EnableAMDGPUAliasAnalysis && getOptLevel() > CodeGenOpt::None;
+
+ Builder.addExtension(
+ PassManagerBuilder::EP_ModuleOptimizerEarly,
+ [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+ if (AMDGPUAA) {
+ PM.add(createAMDGPUAAWrapperPass());
+ PM.add(createAMDGPUExternalAAWrapperPass());
+ }
+ PM.add(createAMDGPUUnifyMetadataPass());
+ if (Internalize) {
+ PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool {
+ if (const Function *F = dyn_cast<Function>(&GV)) {
+ if (F->isDeclaration())
+ return true;
+ switch (F->getCallingConv()) {
+ default:
+ return false;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return true;
+ }
+ }
+ return !GV.use_empty();
+ }));
+ PM.add(createGlobalDCEPass());
+ }
+ if (EarlyInline)
+ PM.add(createAMDGPUAlwaysInlinePass(false));
+ });
+
+ Builder.addExtension(
+ PassManagerBuilder::EP_EarlyAsPossible,
+ [AMDGPUAA](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ if (AMDGPUAA) {
+ PM.add(createAMDGPUAAWrapperPass());
+ PM.add(createAMDGPUExternalAAWrapperPass());
+ }
+ });
}
//===----------------------------------------------------------------------===//
@@ -245,9 +377,21 @@ namespace {
struct SIGISelActualAccessor : public GISelAccessor {
std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
const AMDGPUCallLowering *getCallLowering() const override {
return CallLoweringInfo.get();
}
+ const InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
};
} // end anonymous namespace
@@ -281,6 +425,11 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
GISel->CallLoweringInfo.reset(
new AMDGPUCallLowering(*I->getTargetLowering()));
+ GISel->Legalizer.reset(new AMDGPULegalizerInfo());
+
+ GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo()));
+ GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I,
+ *static_cast<AMDGPURegisterBankInfo*>(GISel->RegBankInfo.get())));
#endif
I->setGISelAccessor(*GISel);
@@ -356,9 +505,9 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override;
- void addIRPasses() override;
bool addPreISel() override;
void addMachineSSAOptimization() override;
+ bool addILPOpts() override;
bool addInstSelector() override;
#ifdef LLVM_BUILD_GLOBAL_ISEL
bool addIRTranslator() override;
@@ -406,11 +555,15 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
}
void AMDGPUPassConfig::addIRPasses() {
+ const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
+
// There is no reason to run these.
disablePass(&StackMapLivenessID);
disablePass(&FuncletLayoutID);
disablePass(&PatchableFunctionID);
+ addPass(createAMDGPULowerIntrinsicsPass(&TM));
+
// Function calls are not supported, so make sure we inline everything.
addPass(createAMDGPUAlwaysInlinePass());
addPass(createAlwaysInlinerLegacyPass());
@@ -421,17 +574,33 @@ void AMDGPUPassConfig::addIRPasses() {
// without ever running any passes on the second.
addPass(createBarrierNoopPass());
+ if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
+ // TODO: May want to move later or split into an early and late one.
+
+ addPass(createAMDGPUCodeGenPreparePass(
+ static_cast<const GCNTargetMachine *>(&TM)));
+ }
+
// Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
addPass(createAMDGPUOpenCLImageTypeLoweringPass());
- const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
if (TM.getOptLevel() > CodeGenOpt::None) {
+ addPass(createInferAddressSpacesPass());
addPass(createAMDGPUPromoteAlloca(&TM));
if (EnableSROA)
addPass(createSROAPass());
addStraightLineScalarOptimizationPasses();
+
+ if (EnableAMDGPUAliasAnalysis) {
+ addPass(createAMDGPUAAWrapperPass());
+ addPass(createExternalAAWrapperPass([](Pass &P, Function &,
+ AAResults &AAR) {
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ }));
+ }
}
TargetPassConfig::addIRPasses();
@@ -526,7 +695,12 @@ bool GCNPassConfig::addPreISel() {
// FIXME: We need to run a pass to propagate the attributes when calls are
// supported.
- addPass(&AMDGPUAnnotateKernelFeaturesID);
+ const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
+ addPass(createAMDGPUAnnotateKernelFeaturesPass(&TM));
+
+ // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
+ // regions formed by them.
+ addPass(&AMDGPUUnifyDivergentExitNodesID);
addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
addPass(createSinkingPass());
addPass(createSITypeRewriter());
@@ -549,13 +723,19 @@ void GCNPassConfig::addMachineSSAOptimization() {
addPass(&SIFoldOperandsID);
addPass(&DeadMachineInstructionElimID);
addPass(&SILoadStoreOptimizerID);
+ addPass(createSIShrinkInstructionsPass());
+ if (EnableSDWAPeephole) {
+ addPass(&SIPeepholeSDWAID);
+ addPass(&DeadMachineInstructionElimID);
+ }
}
-void GCNPassConfig::addIRPasses() {
- // TODO: May want to move later or split into an early and late one.
- addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine()));
+bool GCNPassConfig::addILPOpts() {
+ if (EnableEarlyIfConversion)
+ addPass(&EarlyIfConverterID);
- AMDGPUPassConfig::addIRPasses();
+ TargetPassConfig::addILPOpts();
+ return false;
}
bool GCNPassConfig::addInstSelector() {
@@ -572,20 +752,23 @@ bool GCNPassConfig::addIRTranslator() {
}
bool GCNPassConfig::addLegalizeMachineIR() {
+ addPass(new Legalizer());
return false;
}
bool GCNPassConfig::addRegBankSelect() {
+ addPass(new RegBankSelect());
return false;
}
bool GCNPassConfig::addGlobalInstructionSelect() {
+ addPass(new InstructionSelect());
return false;
}
+
#endif
void GCNPassConfig::addPreRegAlloc() {
- addPass(createSIShrinkInstructionsPass());
addPass(createSIWholeQuadModePass());
}
@@ -615,6 +798,7 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
}
void GCNPassConfig::addPostRegAlloc() {
+ addPass(&SIFixVGPRCopiesID);
addPass(&SIOptimizeExecMaskingID);
TargetPassConfig::addPostRegAlloc();
}
@@ -633,7 +817,10 @@ void GCNPassConfig::addPreEmitPass() {
// cases.
addPass(&PostRAHazardRecognizerID);
- addPass(createSIInsertWaitsPass());
+ if (EnableSIInsertWaitcntsPass)
+ addPass(createSIInsertWaitcntsPass());
+ else
+ addPass(createSIInsertWaitsPass());
addPass(createSIShrinkInstructionsPass());
addPass(&SIInsertSkipsPassID);
addPass(createSIDebuggerInsertNopsPass());
@@ -643,3 +830,4 @@ void GCNPassConfig::addPreEmitPass() {
TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
return new GCNPassConfig(this, PM);
}
+
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 9496773a073f..934bf7f31bab 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -35,6 +35,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
AMDGPUIntrinsicInfo IntrinsicInfo;
+ AMDGPUAS AS;
StringRef getGPUName(const Function &F) const;
StringRef getFeatureString(const Function &F) const;
@@ -57,7 +58,18 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
- void addEarlyAsPossiblePasses(PassManagerBase &PM) override;
+ AMDGPUAS getAMDGPUAS() const {
+ return AS;
+ }
+
+ void adjustPassManager(PassManagerBuilder &) override;
+ /// Get the integer value of a null pointer in the given address space.
+ uint64_t getNullPointerValue(unsigned AddrSpace) const {
+ if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS)
+ return -1;
+ return 0;
+ }
+
};
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index 1fddc88a705a..c96761c0b04e 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPUTargetMachine.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPU.h"
#include "llvm/MC/MCContext.h"
@@ -22,7 +23,8 @@ using namespace llvm;
MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
- if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO) &&
+ auto AS = static_cast<const AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
+ if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO, AS) &&
AMDGPU::shouldEmitConstantsToTextSection(TM.getTargetTriple()))
return TextSection;
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
index de327786dff6..ca6210f69298 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -16,6 +16,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
+#include "AMDGPU.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index e90487065992..01ac9968181a 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -29,6 +29,39 @@ using namespace llvm;
#define DEBUG_TYPE "AMDGPUtti"
+static cl::opt<unsigned> UnrollThresholdPrivate(
+ "amdgpu-unroll-threshold-private",
+ cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
+ cl::init(2500), cl::Hidden);
+
+static cl::opt<unsigned> UnrollThresholdLocal(
+ "amdgpu-unroll-threshold-local",
+ cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"),
+ cl::init(1000), cl::Hidden);
+
+static cl::opt<unsigned> UnrollThresholdIf(
+ "amdgpu-unroll-threshold-if",
+ cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
+ cl::init(150), cl::Hidden);
+
+static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
+ unsigned Depth = 0) {
+ const Instruction *I = dyn_cast<Instruction>(Cond);
+ if (!I)
+ return false;
+
+ for (const Value *V : I->operand_values()) {
+ if (!L->contains(I))
+ continue;
+ if (const PHINode *PHI = dyn_cast<PHINode>(V)) {
+ if (none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
+ return SubLoop->contains(PHI); }))
+ return true;
+ } else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1))
+ return true;
+ }
+ return false;
+}
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
TTI::UnrollingPreferences &UP) {
@@ -38,29 +71,115 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
// TODO: Do we want runtime unrolling?
+ // Maximum alloca size than can fit registers. Reserve 16 registers.
+ const unsigned MaxAlloca = (256 - 16) * 4;
+ unsigned ThresholdPrivate = UnrollThresholdPrivate;
+ unsigned ThresholdLocal = UnrollThresholdLocal;
+ unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
+ AMDGPUAS ASST = ST->getAMDGPUAS();
for (const BasicBlock *BB : L->getBlocks()) {
const DataLayout &DL = BB->getModule()->getDataLayout();
+ unsigned LocalGEPsSeen = 0;
+
+ if (any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
+ return SubLoop->contains(BB); }))
+ continue; // Block belongs to an inner loop.
+
for (const Instruction &I : *BB) {
+
+ // Unroll a loop which contains an "if" statement whose condition
+ // defined by a PHI belonging to the loop. This may help to eliminate
+ // if region and potentially even PHI itself, saving on both divergence
+ // and registers used for the PHI.
+ // Add a small bonus for each of such "if" statements.
+ if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
+ if (UP.Threshold < MaxBoost && Br->isConditional()) {
+ if (L->isLoopExiting(Br->getSuccessor(0)) ||
+ L->isLoopExiting(Br->getSuccessor(1)))
+ continue;
+ if (dependsOnLocalPhi(L, Br->getCondition())) {
+ UP.Threshold += UnrollThresholdIf;
+ DEBUG(dbgs() << "Set unroll threshold " << UP.Threshold
+ << " for loop:\n" << *L << " due to " << *Br << '\n');
+ if (UP.Threshold >= MaxBoost)
+ return;
+ }
+ }
+ continue;
+ }
+
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
- if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ if (!GEP)
+ continue;
+
+ unsigned AS = GEP->getAddressSpace();
+ unsigned Threshold = 0;
+ if (AS == ASST.PRIVATE_ADDRESS)
+ Threshold = ThresholdPrivate;
+ else if (AS == ASST.LOCAL_ADDRESS)
+ Threshold = ThresholdLocal;
+ else
+ continue;
+
+ if (UP.Threshold >= Threshold)
continue;
- const Value *Ptr = GEP->getPointerOperand();
- const AllocaInst *Alloca =
- dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
- if (Alloca) {
- // We want to do whatever we can to limit the number of alloca
- // instructions that make it through to the code generator. allocas
- // require us to use indirect addressing, which is slow and prone to
- // compiler bugs. If this loop does an address calculation on an
- // alloca ptr, then we want to use a higher than normal loop unroll
- // threshold. This will give SROA a better chance to eliminate these
- // allocas.
- //
- // Don't use the maximum allowed value here as it will make some
- // programs way too big.
- UP.Threshold = 800;
+ if (AS == ASST.PRIVATE_ADDRESS) {
+ const Value *Ptr = GEP->getPointerOperand();
+ const AllocaInst *Alloca =
+ dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
+ if (!Alloca || !Alloca->isStaticAlloca())
+ continue;
+ Type *Ty = Alloca->getAllocatedType();
+ unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
+ if (AllocaSize > MaxAlloca)
+ continue;
+ } else if (AS == ASST.LOCAL_ADDRESS) {
+ LocalGEPsSeen++;
+ // Inhibit unroll for local memory if we have seen addressing not to
+ // a variable, most likely we will be unable to combine it.
+ // Do not unroll too deep inner loops for local memory to give a chance
+ // to unroll an outer loop for a more important reason.
+ if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
+ (!isa<GlobalVariable>(GEP->getPointerOperand()) &&
+ !isa<Argument>(GEP->getPointerOperand())))
+ continue;
}
+
+ // Check if GEP depends on a value defined by this loop itself.
+ bool HasLoopDef = false;
+ for (const Value *Op : GEP->operands()) {
+ const Instruction *Inst = dyn_cast<Instruction>(Op);
+ if (!Inst || L->isLoopInvariant(Op))
+ continue;
+
+ if (any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {
+ return SubLoop->contains(Inst); }))
+ continue;
+ HasLoopDef = true;
+ break;
+ }
+ if (!HasLoopDef)
+ continue;
+
+ // We want to do whatever we can to limit the number of alloca
+ // instructions that make it through to the code generator. allocas
+ // require us to use indirect addressing, which is slow and prone to
+ // compiler bugs. If this loop does an address calculation on an
+ // alloca ptr, then we want to use a higher than normal loop unroll
+ // threshold. This will give SROA a better chance to eliminate these
+ // allocas.
+ //
+ // We also want to have more unrolling for local memory to let ds
+ // instructions with different offsets combine.
+ //
+ // Don't use the maximum allowed value here as it will make some
+ // programs way too big.
+ UP.Threshold = Threshold;
+ DEBUG(dbgs() << "Set unroll threshold " << Threshold << " for loop:\n"
+ << *L << " due to " << *GEP << '\n');
+ if (UP.Threshold >= MaxBoost)
+ return;
}
}
}
@@ -81,28 +200,56 @@ unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) {
}
unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
- switch (AddrSpace) {
- case AMDGPUAS::GLOBAL_ADDRESS:
- case AMDGPUAS::CONSTANT_ADDRESS:
- case AMDGPUAS::FLAT_ADDRESS:
+ AMDGPUAS AS = ST->getAMDGPUAS();
+ if (AddrSpace == AS.GLOBAL_ADDRESS ||
+ AddrSpace == AS.CONSTANT_ADDRESS ||
+ AddrSpace == AS.FLAT_ADDRESS)
return 128;
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS:
+ if (AddrSpace == AS.LOCAL_ADDRESS ||
+ AddrSpace == AS.REGION_ADDRESS)
return 64;
- case AMDGPUAS::PRIVATE_ADDRESS:
+ if (AddrSpace == AS.PRIVATE_ADDRESS)
return 8 * ST->getMaxPrivateElementSize();
- default:
- if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
- (AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
- AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
- (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
- AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
- return 128;
- llvm_unreachable("unhandled address space");
+
+ if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
+ (AddrSpace == AS.PARAM_D_ADDRESS ||
+ AddrSpace == AS.PARAM_I_ADDRESS ||
+ (AddrSpace >= AS.CONSTANT_BUFFER_0 &&
+ AddrSpace <= AS.CONSTANT_BUFFER_15)))
+ return 128;
+ llvm_unreachable("unhandled address space");
+}
+
+bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ // We allow vectorization of flat stores, even though we may need to decompose
+ // them later if they may access private memory. We don't have enough context
+ // here, and legalization can handle it.
+ if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) {
+ return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) &&
+ ChainSizeInBytes <= ST->getMaxPrivateElementSize();
}
+ return true;
+}
+
+bool AMDGPUTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
+}
+
+bool AMDGPUTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
}
unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+ // Disable unrolling if the loop is not vectorized.
+ if (VF == 1)
+ return 1;
+
// Semi-arbitrary large amount.
return 64;
}
@@ -228,16 +375,8 @@ int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
}
}
-static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
- const IntrinsicInst *I) {
+static bool isIntrinsicSourceOfDivergence(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::not_intrinsic:
- // This means we have an intrinsic that isn't defined in
- // IntrinsicsAMDGPU.td
- break;
-
case Intrinsic::amdgcn_workitem_id_x:
case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::amdgcn_workitem_id_z:
@@ -249,6 +388,8 @@ static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
case Intrinsic::r600_read_tidig_x:
case Intrinsic::r600_read_tidig_y:
case Intrinsic::r600_read_tidig_z:
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_image_atomic_swap:
case Intrinsic::amdgcn_image_atomic_add:
case Intrinsic::amdgcn_image_atomic_sub:
@@ -274,16 +415,10 @@ static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
case Intrinsic::amdgcn_buffer_atomic_xor:
case Intrinsic::amdgcn_buffer_atomic_cmpswap:
case Intrinsic::amdgcn_ps_live:
+ case Intrinsic::amdgcn_ds_swizzle:
return true;
- }
-
- StringRef Name = I->getCalledFunction()->getName();
- switch (TII->lookupName((const char *)Name.bytes_begin(), Name.size())) {
default:
return false;
- case AMDGPUIntrinsic::SI_fs_interp:
- case AMDGPUIntrinsic::SI_fs_constant:
- return true;
}
}
@@ -295,8 +430,8 @@ static bool isArgPassedInSGPR(const Argument *A) {
return true;
// For non-compute shaders, SGPR inputs are marked with either inreg or byval.
- if (F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::InReg) ||
- F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::ByVal))
+ if (F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
+ F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal))
return true;
// Everything else is in VGPRs.
@@ -318,7 +453,7 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
// All other loads are not divergent, because if threads issue loads with the
// same arguments, they will always get the same result.
if (const LoadInst *Load = dyn_cast<LoadInst>(V))
- return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+ return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS;
// Atomics are divergent because they are executed sequentially: when an
// atomic operation refers to the same address in each thread, then each
@@ -327,10 +462,8 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
if (isa<AtomicRMWInst>(V) || isa<AtomicCmpXchgInst>(V))
return true;
- if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
- const TargetMachine &TM = getTLI()->getTargetMachine();
- return isIntrinsicSourceOfDivergence(TM.getIntrinsicInfo(), Intrinsic);
- }
+ if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V))
+ return isIntrinsicSourceOfDivergence(Intrinsic);
// Assume all function calls are a source of divergence.
if (isa<CallInst>(V) || isa<InvokeInst>(V))
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 0d83b2a585bf..71d6306bc1a5 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -32,6 +32,7 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
const AMDGPUSubtarget *ST;
const AMDGPUTargetLowering *TLI;
+ bool IsGraphicsShader;
const AMDGPUSubtarget *getST() const { return ST; }
const AMDGPUTargetLowering *getTLI() const { return TLI; }
@@ -62,7 +63,8 @@ public:
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
ST(TM->getSubtargetImpl(F)),
- TLI(ST->getTargetLowering()) {}
+ TLI(ST->getTargetLowering()),
+ IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
bool hasBranchDivergence() { return true; }
@@ -76,6 +78,17 @@ public:
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
+
+ bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const;
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const;
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const;
+
unsigned getMaxInterleaveFactor(unsigned VF);
int getArithmeticInstrCost(
@@ -91,6 +104,15 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
bool isSourceOfDivergence(const Value *V) const;
+ unsigned getFlatAddressSpace() const {
+ // Don't bother running InferAddressSpaces pass on graphics shaders which
+ // don't use flat addressing.
+ if (IsGraphicsShader)
+ return -1;
+ return ST->hasFlatAddressSpace() ?
+ ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
+ }
+
unsigned getVectorSplitCost() { return 0; }
};
diff --git a/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
new file mode 100644
index 000000000000..309913f87fb6
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -0,0 +1,225 @@
+//===- AMDGPUUnifyDivergentExitNodes.cpp ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a variant of the UnifyDivergentExitNodes pass. Rather than ensuring
+// there is at most one ret and one unreachable instruction, it ensures there is
+// at most one divergent exiting block.
+//
+// StructurizeCFG can't deal with multi-exit regions formed by branches to
+// multiple return nodes. It is not desirable to structurize regions with
+// uniform branches, so unifying those to the same return block as divergent
+// branches inhibits use of scalar branching. It still can't deal with the case
+// where one branch goes to return, and one unreachable. Replace unreachable in
+// this case with a return.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
+
+namespace {
+
+class AMDGPUUnifyDivergentExitNodes : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) {
+ initializeAMDGPUUnifyDivergentExitNodesPass(*PassRegistry::getPassRegistry());
+ }
+
+ // We can preserve non-critical-edgeness when we unify function exit nodes
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+};
+
+}
+
+char AMDGPUUnifyDivergentExitNodes::ID = 0;
+INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
+ "Unify divergent function exit nodes", false, false)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
+ "Unify divergent function exit nodes", false, false)
+
+char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID;
+
+void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+ // TODO: Preserve dominator tree.
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+
+ AU.addRequired<DivergenceAnalysis>();
+
+ // No divergent values are changed, only blocks and branch edges.
+ AU.addPreserved<DivergenceAnalysis>();
+
+ // We preserve the non-critical-edgeness property
+ AU.addPreservedID(BreakCriticalEdgesID);
+
+ // This is a cluster of orthogonal Transforms
+ AU.addPreservedID(LowerSwitchID);
+ FunctionPass::getAnalysisUsage(AU);
+
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+}
+
+/// \returns true if \p BB is reachable through only uniform branches.
+/// XXX - Is there a more efficient way to find this?
+static bool isUniformlyReached(const DivergenceAnalysis &DA,
+ BasicBlock &BB) {
+ SmallVector<BasicBlock *, 8> Stack;
+ SmallPtrSet<BasicBlock *, 8> Visited;
+
+ for (BasicBlock *Pred : predecessors(&BB))
+ Stack.push_back(Pred);
+
+ while (!Stack.empty()) {
+ BasicBlock *Top = Stack.pop_back_val();
+ if (!DA.isUniform(Top->getTerminator()))
+ return false;
+
+ for (BasicBlock *Pred : predecessors(Top)) {
+ if (Visited.insert(Pred).second)
+ Stack.push_back(Pred);
+ }
+ }
+
+ return true;
+}
+
+static BasicBlock *unifyReturnBlockSet(Function &F,
+ ArrayRef<BasicBlock *> ReturningBlocks,
+ const TargetTransformInfo &TTI,
+ StringRef Name) {
+ // Otherwise, we need to insert a new basic block into the function, add a PHI
+ // nodes (if the function returns values), and convert all of the return
+ // instructions into unconditional branches.
+ //
+ BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F);
+
+ PHINode *PN = nullptr;
+ if (F.getReturnType()->isVoidTy()) {
+ ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
+ } else {
+ // If the function doesn't return void... add a PHI node to the block...
+ PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
+ "UnifiedRetVal");
+ NewRetBlock->getInstList().push_back(PN);
+ ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+ }
+
+ // Loop over all of the blocks, replacing the return instruction with an
+ // unconditional branch.
+ //
+ for (BasicBlock *BB : ReturningBlocks) {
+ // Add an incoming element to the PHI node for every return instruction that
+ // is merging into this new block...
+ if (PN)
+ PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+ BB->getInstList().pop_back(); // Remove the return insn
+ BranchInst::Create(NewRetBlock, BB);
+ }
+
+ for (BasicBlock *BB : ReturningBlocks) {
+ // Cleanup possible branch to unconditional branch to the return.
+ SimplifyCFG(BB, TTI, 2);
+ }
+
+ return NewRetBlock;
+}
+
+bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
+ auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ if (PDT.getRoots().size() <= 1)
+ return false;
+
+ DivergenceAnalysis &DA = getAnalysis<DivergenceAnalysis>();
+
+ // Loop over all of the blocks in a function, tracking all of the blocks that
+ // return.
+ //
+ SmallVector<BasicBlock *, 4> ReturningBlocks;
+ SmallVector<BasicBlock *, 4> UnreachableBlocks;
+
+ for (BasicBlock *BB : PDT.getRoots()) {
+ if (isa<ReturnInst>(BB->getTerminator())) {
+ if (!isUniformlyReached(DA, *BB))
+ ReturningBlocks.push_back(BB);
+ } else if (isa<UnreachableInst>(BB->getTerminator())) {
+ if (!isUniformlyReached(DA, *BB))
+ UnreachableBlocks.push_back(BB);
+ }
+ }
+
+ if (!UnreachableBlocks.empty()) {
+ BasicBlock *UnreachableBlock = nullptr;
+
+ if (UnreachableBlocks.size() == 1) {
+ UnreachableBlock = UnreachableBlocks.front();
+ } else {
+ UnreachableBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedUnreachableBlock", &F);
+ new UnreachableInst(F.getContext(), UnreachableBlock);
+
+ for (BasicBlock *BB : UnreachableBlocks) {
+ BB->getInstList().pop_back(); // Remove the unreachable inst.
+ BranchInst::Create(UnreachableBlock, BB);
+ }
+ }
+
+ if (!ReturningBlocks.empty()) {
+ // Don't create a new unreachable inst if we have a return. The
+ // structurizer/annotator can't handle the multiple exits
+
+ Type *RetTy = F.getReturnType();
+ Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
+ UnreachableBlock->getInstList().pop_back(); // Remove the unreachable inst.
+
+ Function *UnreachableIntrin =
+ Intrinsic::getDeclaration(F.getParent(), Intrinsic::amdgcn_unreachable);
+
+ // Insert a call to an intrinsic tracking that this is an unreachable
+ // point, in case we want to kill the active lanes or something later.
+ CallInst::Create(UnreachableIntrin, {}, "", UnreachableBlock);
+
+ // Don't create a scalar trap. We would only want to trap if this code was
+ // really reached, but a scalar trap would happen even if no lanes
+ // actually reached here.
+ ReturnInst::Create(F.getContext(), RetVal, UnreachableBlock);
+ ReturningBlocks.push_back(UnreachableBlock);
+ }
+ }
+
+ // Now handle return blocks.
+ if (ReturningBlocks.empty())
+ return false; // No blocks return
+
+ if (ReturningBlocks.size() == 1)
+ return false; // Already has a single return block
+
+ const TargetTransformInfo &TTI
+ = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ unifyReturnBlockSet(F, ReturningBlocks, TTI, "UnifiedReturnBlock");
+ return true;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp b/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
index bf501a1e8405..3a0c3ede08f4 100644
--- a/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
+++ b/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
@@ -13,38 +13,39 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include <algorithm>
+#include <cassert>
using namespace llvm;
namespace {
+
namespace kOCLMD {
+
const char SpirVer[] = "opencl.spir.version";
const char OCLVer[] = "opencl.ocl.version";
const char UsedExt[] = "opencl.used.extensions";
const char UsedOptCoreFeat[] = "opencl.used.optional.core.features";
const char CompilerOptions[] = "opencl.compiler.options";
const char LLVMIdent[] = "llvm.ident";
- }
+
+ } // end namespace kOCLMD
/// \brief Unify multiple OpenCL metadata due to linking.
- class AMDGPUUnifyMetadata : public FunctionPass {
+ class AMDGPUUnifyMetadata : public ModulePass {
public:
static char ID;
- explicit AMDGPUUnifyMetadata() : FunctionPass(ID) {};
+ explicit AMDGPUUnifyMetadata() : ModulePass(ID) {};
private:
- // This should really be a module pass but we have to run it as early
- // as possible, so given function passes are executed first and
- // TargetMachine::addEarlyAsPossiblePasses() expects only function passes
- // it has to be a function pass.
virtual bool runOnModule(Module &M);
- // \todo: Convert to a module pass.
- virtual bool runOnFunction(Function &F);
-
/// \brief Unify version metadata.
/// \return true if changes are made.
/// Assume the named metadata has operands each of which is a pair of
@@ -117,7 +118,7 @@ INITIALIZE_PASS(AMDGPUUnifyMetadata, "amdgpu-unify-metadata",
"Unify multiple OpenCL metadata due to linking",
false, false)
-FunctionPass* llvm::createAMDGPUUnifyMetadataPass() {
+ModulePass* llvm::createAMDGPUUnifyMetadataPass() {
return new AMDGPUUnifyMetadata();
}
@@ -143,7 +144,3 @@ bool AMDGPUUnifyMetadata::runOnModule(Module &M) {
return Changed;
}
-
-bool AMDGPUUnifyMetadata::runOnFunction(Function &F) {
- return runOnModule(*F.getParent());
-}
diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 7faeccdc5df3..1a393845a822 100644
--- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -9,27 +9,40 @@
//==-----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
#include "R600InstrInfo.h"
+#include "R600RegisterInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Dominators.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstddef>
#include <deque>
+#include <iterator>
+#include <map>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -53,15 +66,19 @@ STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
namespace llvm {
+
void initializeAMDGPUCFGStructurizerPass(PassRegistry&);
-}
+
+} // end namespace llvm
+
+namespace {
//===----------------------------------------------------------------------===//
//
// Miscellaneous utility for CFGStructurizer.
//
//===----------------------------------------------------------------------===//
-namespace {
+
#define SHOWNEWINSTR(i) \
DEBUG(dbgs() << "New instr: " << *i << "\n");
@@ -82,35 +99,19 @@ DEBUG( \
#define INVALIDSCCNUM -1
-template<class NodeT>
-void ReverseVector(SmallVectorImpl<NodeT *> &Src) {
- size_t sz = Src.size();
- for (size_t i = 0; i < sz/2; ++i) {
- NodeT *t = Src[i];
- Src[i] = Src[sz - i - 1];
- Src[sz - i - 1] = t;
- }
-}
-
-} // end anonymous namespace
-
//===----------------------------------------------------------------------===//
//
// supporting data structure for CFGStructurizer
//
//===----------------------------------------------------------------------===//
-
-namespace {
-
class BlockInformation {
public:
- bool IsRetired;
- int SccNum;
- BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
-};
+ bool IsRetired = false;
+ int SccNum = INVALIDSCCNUM;
-} // end anonymous namespace
+ BlockInformation() = default;
+};
//===----------------------------------------------------------------------===//
//
@@ -118,7 +119,6 @@ public:
//
//===----------------------------------------------------------------------===//
-namespace {
class AMDGPUCFGStructurizer : public MachineFunctionPass {
public:
typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
@@ -133,8 +133,7 @@ public:
static char ID;
- AMDGPUCFGStructurizer() :
- MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {
+ AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
}
@@ -167,7 +166,7 @@ public:
MLI = &getAnalysis<MachineLoopInfo>();
DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
MDT = &getAnalysis<MachineDominatorTree>();
- DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr););
+ DEBUG(MDT->print(dbgs(), (const Module*)nullptr););
PDT = &getAnalysis<MachinePostDominatorTree>();
DEBUG(PDT->print(dbgs()););
prepare();
@@ -180,8 +179,8 @@ protected:
MachineDominatorTree *MDT;
MachinePostDominatorTree *PDT;
MachineLoopInfo *MLI;
- const R600InstrInfo *TII;
- const R600RegisterInfo *TRI;
+ const R600InstrInfo *TII = nullptr;
+ const R600RegisterInfo *TRI = nullptr;
// PRINT FUNCTIONS
/// Print the ordered Blocks.
@@ -198,6 +197,7 @@ protected:
}
}
}
+
static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
for (MachineLoop::iterator iter = LoopInfo.begin(),
iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
@@ -263,7 +263,6 @@ protected:
MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
static void wrapup(MachineBasicBlock *MBB);
-
int patternMatch(MachineBasicBlock *MBB);
int patternMatchGroup(MachineBasicBlock *MBB);
int serialPatternMatch(MachineBasicBlock *MBB);
@@ -328,7 +327,6 @@ protected:
void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
void retireBlock(MachineBasicBlock *MBB);
-
private:
MBBInfoMap BlockInfoMap;
LoopLandInfoMap LLInfoMap;
@@ -337,6 +335,10 @@ private:
SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
};
+char AMDGPUCFGStructurizer::ID = 0;
+
+} // end anonymous namespace
+
int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
if (It == BlockInfoMap.end())
@@ -379,6 +381,7 @@ bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
}
return false;
}
+
AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
bool AllowSideEntry) const {
@@ -697,10 +700,8 @@ void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
// (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
// there isn't such an interface yet. alternatively, replace all the other
// blocks in the jump table with the entryBlk //}
-
}
-
bool AMDGPUCFGStructurizer::prepare() {
bool Changed = false;
@@ -748,7 +749,6 @@ bool AMDGPUCFGStructurizer::prepare() {
}
bool AMDGPUCFGStructurizer::run() {
-
//Assume reducible CFG...
DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
@@ -886,8 +886,6 @@ bool AMDGPUCFGStructurizer::run() {
return true;
}
-
-
void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
int SccNum = 0;
MachineBasicBlock *MBB;
@@ -903,11 +901,8 @@ void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
}
}
- //walk through all the block in func to check for unreachable
- typedef GraphTraits<MachineFunction *> GTM;
- auto It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
- for (; It != E; ++It) {
- MachineBasicBlock *MBB = *It;
+ // walk through all the block in func to check for unreachable
+ for (auto *MBB : nodes(MF)) {
SccNum = getSCCNum(MBB);
if (SccNum == INVALIDSCCNUM)
dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
@@ -941,7 +936,6 @@ int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
return NumMatch;
}
-
int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
if (MBB->succ_size() != 1)
return 0;
@@ -1039,7 +1033,7 @@ int AMDGPUCFGStructurizer::loopendPatternMatch() {
for (MachineLoop *ML : depth_first(It))
NestedLoops.push_front(ML);
- if (NestedLoops.size() == 0)
+ if (NestedLoops.empty())
return 0;
// Process nested loop outside->inside (we did push_front),
@@ -1074,13 +1068,9 @@ int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
MachineBasicBlock *ExitBlk = *ExitBlks.begin();
assert(ExitBlk && "Loop has several exit block");
MBBVector LatchBlks;
- typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
- InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
- PE = InvMBBTraits::child_end(LoopHeader);
- for (; PI != PE; PI++) {
- if (LoopRep->contains(*PI))
- LatchBlks.push_back(*PI);
- }
+ for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
+ if (LoopRep->contains(LB))
+ LatchBlks.push_back(LB);
for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
@@ -1217,7 +1207,7 @@ void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
}
}
- dbgs() << "\n";
+ dbgs() << "\n";
}
int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
@@ -1478,7 +1468,6 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
if (LandMBB && TrueMBB && FalseMBB)
MBB->addSuccessor(LandMBB);
-
}
void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
@@ -1491,7 +1480,6 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
DstBlk->replaceSuccessor(DstBlk, LandMBB);
}
-
void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
MachineBasicBlock *LandMBB) {
DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
@@ -1727,11 +1715,6 @@ void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
&& "can't retire block yet");
}
-char AMDGPUCFGStructurizer::ID = 0;
-
-} // end anonymous namespace
-
-
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
"AMDGPU CFG Structurizer", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 3cf9a1d92469..961f7186f373 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -16,6 +16,7 @@
#include "Utils/AMDGPUAsmUtils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/STLExtras.h"
@@ -39,15 +40,12 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -56,7 +54,6 @@
#include <map>
#include <memory>
#include <string>
-#include <vector>
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -83,7 +80,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
const AMDGPUAsmParser *AsmParser;
public:
- AMDGPUOperand(enum KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
+ AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
: MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
typedef std::unique_ptr<AMDGPUOperand> Ptr;
@@ -160,7 +157,11 @@ public:
ImmTySendMsg,
ImmTyInterpSlot,
ImmTyInterpAttr,
- ImmTyAttrChan
+ ImmTyAttrChan,
+ ImmTyOpSel,
+ ImmTyOpSelHi,
+ ImmTyNegLo,
+ ImmTyNegHi
};
struct TokOp {
@@ -297,6 +298,10 @@ public:
bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
+ bool isOpSel() const { return isImmTy(ImmTyOpSel); }
+ bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
+ bool isNegLo() const { return isImmTy(ImmTyNegLo); }
+ bool isNegHi() const { return isImmTy(ImmTyNegHi); }
bool isMod() const {
return isClampSI() || isOModSI();
@@ -316,6 +321,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
}
+ bool isSCSrcV2B16() const {
+ return isSCSrcB16();
+ }
+
bool isSCSrcB32() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
}
@@ -328,6 +337,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
}
+ bool isSCSrcV2F16() const {
+ return isSCSrcF16();
+ }
+
bool isSCSrcF32() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
}
@@ -344,6 +357,11 @@ public:
return isSCSrcB16() || isLiteralImm(MVT::i16);
}
+ bool isSSrcV2B16() const {
+ llvm_unreachable("cannot happen");
+ return isSSrcB16();
+ }
+
bool isSSrcB64() const {
// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
// See isVSrc64().
@@ -362,6 +380,11 @@ public:
return isSCSrcB16() || isLiteralImm(MVT::f16);
}
+ bool isSSrcV2F16() const {
+ llvm_unreachable("cannot happen");
+ return isSSrcF16();
+ }
+
bool isVCSrcB32() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
@@ -374,6 +397,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
}
+ bool isVCSrcV2B16() const {
+ return isVCSrcB16();
+ }
+
bool isVCSrcF32() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
}
@@ -386,6 +413,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
}
+ bool isVCSrcV2F16() const {
+ return isVCSrcF16();
+ }
+
bool isVSrcB32() const {
return isVCSrcF32() || isLiteralImm(MVT::i32);
}
@@ -398,6 +429,11 @@ public:
return isVCSrcF16() || isLiteralImm(MVT::i16);
}
+ bool isVSrcV2B16() const {
+ llvm_unreachable("cannot happen");
+ return isVSrcB16();
+ }
+
bool isVSrcF32() const {
return isVCSrcF32() || isLiteralImm(MVT::f32);
}
@@ -410,6 +446,11 @@ public:
return isVCSrcF16() || isLiteralImm(MVT::f16);
}
+ bool isVSrcV2F16() const {
+ llvm_unreachable("cannot happen");
+ return isVSrcF16();
+ }
+
bool isKImmFP32() const {
return isLiteralImm(MVT::f32);
}
@@ -459,7 +500,7 @@ public:
return Imm.Val;
}
- enum ImmTy getImmTy() const {
+ ImmTy getImmTy() const {
assert(isImm());
return Imm.Type;
}
@@ -501,9 +542,11 @@ public:
return getModifiers().hasIntModifiers();
}
+ uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
+
void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
- void addLiteralImmOperand(MCInst &Inst, int64_t Val) const;
+ void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
template <unsigned Bitwidth>
void addKImmFPOperands(MCInst &Inst, unsigned N) const;
@@ -610,6 +653,10 @@ public:
case ImmTyInterpSlot: OS << "InterpSlot"; break;
case ImmTyInterpAttr: OS << "InterpAttr"; break;
case ImmTyAttrChan: OS << "AttrChan"; break;
+ case ImmTyOpSel: OS << "OpSel"; break;
+ case ImmTyOpSelHi: OS << "OpSelHi"; break;
+ case ImmTyNegLo: OS << "NegLo"; break;
+ case ImmTyNegHi: OS << "NegHi"; break;
}
}
@@ -636,7 +683,7 @@ public:
static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
int64_t Val, SMLoc Loc,
- enum ImmTy Type = ImmTyNone,
+ ImmTy Type = ImmTyNone,
bool IsFPImm = false) {
auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
Op->Imm.Val = Val;
@@ -695,9 +742,9 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
// .amdgpu_hsa_kernel or at EOF.
class KernelScopeInfo {
- int SgprIndexUnusedMin;
- int VgprIndexUnusedMin;
- MCContext *Ctx;
+ int SgprIndexUnusedMin = -1;
+ int VgprIndexUnusedMin = -1;
+ MCContext *Ctx = nullptr;
void usesSgprAt(int i) {
if (i >= SgprIndexUnusedMin) {
@@ -708,6 +755,7 @@ class KernelScopeInfo {
}
}
}
+
void usesVgprAt(int i) {
if (i >= VgprIndexUnusedMin) {
VgprIndexUnusedMin = ++i;
@@ -717,14 +765,16 @@ class KernelScopeInfo {
}
}
}
+
public:
- KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1), Ctx(nullptr)
- {}
+ KernelScopeInfo() = default;
+
void initialize(MCContext &Context) {
Ctx = &Context;
usesSgprAt(SgprIndexUnusedMin = -1);
usesVgprAt(VgprIndexUnusedMin = -1);
}
+
void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
switch (RegKind) {
case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
@@ -738,9 +788,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
const MCInstrInfo &MII;
MCAsmParser &Parser;
- unsigned ForcedEncodingSize;
- bool ForcedDPP;
- bool ForcedSDWA;
+ unsigned ForcedEncodingSize = 0;
+ bool ForcedDPP = false;
+ bool ForcedSDWA = false;
KernelScopeInfo KernelScope;
/// @name Auto-generated Match Functions
@@ -756,7 +806,7 @@ private:
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
bool ParseDirectiveHSACodeObjectVersion();
bool ParseDirectiveHSACodeObjectISA();
- bool ParseDirectiveRuntimeMetadata();
+ bool ParseDirectiveCodeObjectMetadata();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
bool ParseSectionDirectiveHSAText();
@@ -767,44 +817,52 @@ private:
bool ParseSectionDirectiveHSADataGlobalAgent();
bool ParseSectionDirectiveHSADataGlobalProgram();
bool ParseSectionDirectiveHSARodataReadonlyAgent();
- bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum);
- bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex);
- void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn);
+ bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
+ RegisterKind RegKind, unsigned Reg1,
+ unsigned RegNum);
+ bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
+ unsigned& RegNum, unsigned& RegWidth,
+ unsigned *DwordRegIndex);
+ void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
+ bool IsAtomic, bool IsAtomicReturn);
+ void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
+ bool IsGdsHardcoded);
public:
enum AMDGPUMatchResultTy {
Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
};
+ typedef std::map<AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap;
+
AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
- : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser),
- ForcedEncodingSize(0),
- ForcedDPP(false),
- ForcedSDWA(false) {
+ : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser) {
MCAsmParserExtension::Initialize(Parser);
- if (getSTI().getFeatureBits().none()) {
+ if (getFeatureBits().none()) {
// Set default features.
copySTI().ToggleFeature("SOUTHERN_ISLANDS");
}
- setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+ setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
{
// TODO: make those pre-defined variables read-only.
// Currently there is none suitable machinery in the core llvm-mc for this.
// MCSymbol::isRedefinable is intended for another purpose, and
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
- AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
MCContext &Ctx = getContext();
- MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
- Sym->setVariableValue(MCConstantExpr::create(Isa.Major, Ctx));
+ MCSymbol *Sym =
+ Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
- Sym->setVariableValue(MCConstantExpr::create(Isa.Minor, Ctx));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
- Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
KernelScope.initialize(getContext());
}
@@ -822,7 +880,7 @@ public:
}
bool hasInv2PiInlineImm() const {
- return getSTI().getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
+ return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
bool hasSGPR102_SGPR103() const {
@@ -844,6 +902,10 @@ public:
return &MII;
}
+ const FeatureBitset &getFeatureBits() const {
+ return getSTI().getFeatureBits();
+ }
+
void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
@@ -871,19 +933,28 @@ public:
//bool ProcessInstruction(MCInst &Inst);
OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
+
OperandMatchResultTy
parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
+ AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
bool (*ConvertResult)(int64_t &) = nullptr);
+
+ OperandMatchResultTy parseOperandArrayWithPrefix(
+ const char *Prefix,
+ OperandVector &Operands,
+ AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
+ bool (*ConvertResult)(int64_t&) = nullptr);
+
OperandMatchResultTy
parseNamedBit(const char *Name, OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
+ AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
StringRef &Value);
- OperandMatchResultTy parseImm(OperandVector &Operands);
+ bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
+ OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
OperandMatchResultTy parseReg(OperandVector &Operands);
- OperandMatchResultTy parseRegOrImm(OperandVector &Operands);
+ OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
@@ -891,7 +962,8 @@ public:
OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
- void cvtDS(MCInst &Inst, const OperandVector &Operands);
+ void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
+ void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
void cvtExp(MCInst &Inst, const OperandVector &Operands);
bool parseCnt(int64_t &IntVal);
@@ -911,6 +983,12 @@ private:
void errorExpTgt();
OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
+ bool validateOperandLimitations(const MCInst &Inst);
+ bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
+ bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
+ unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
+ bool isSGPR(unsigned Reg);
+
public:
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
@@ -940,7 +1018,13 @@ public:
void cvtId(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands);
+
+ void cvtVOP3Impl(MCInst &Inst,
+ const OperandVector &Operands,
+ OptionalImmIndexMap &OptionalIdx);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
+ void cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands);
+ void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
void cvtMIMG(MCInst &Inst, const OperandVector &Operands);
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
@@ -988,6 +1072,30 @@ static const fltSemantics *getFltSemantics(MVT VT) {
return getFltSemantics(VT.getSizeInBits() / 8);
}
+static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
+ switch (OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ return &APFloat::IEEEsingle();
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ return &APFloat::IEEEdouble();
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ return &APFloat::IEEEhalf();
+ default:
+ llvm_unreachable("unsupported fp type");
+ }
+}
+
//===----------------------------------------------------------------------===//
// Operand
//===----------------------------------------------------------------------===//
@@ -1031,13 +1139,18 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
if (!canLosslesslyConvertToFPType(FPLiteral, type))
return false;
+ if (type.getScalarSizeInBits() == 16) {
+ return AMDGPU::isInlinableLiteral16(
+ static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
+ AsmParser->hasInv2PiInlineImm());
+ }
+
// Check if single precision literal is inlinable
return AMDGPU::isInlinableLiteral32(
static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
AsmParser->hasInv2PiInlineImm());
}
-
// We got int literal token.
if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
return AMDGPU::isInlinableLiteral64(Imm.Val,
@@ -1064,6 +1177,13 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
if (!Imm.IsFPImm) {
// We got int literal token.
+ if (type == MVT::f64 && hasFPModifiers()) {
+ // Cannot apply fp modifiers to int literals preserving the same semantics
+ // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
+ // disable these cases.
+ return false;
+ }
+
unsigned Size = type.getSizeInBits();
if (Size == 64)
Size = 32;
@@ -1093,40 +1213,57 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const {
return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
}
-void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
- int64_t Val = Imm.Val;
- if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers() && Imm.Mods.Neg) {
- // Apply modifiers to immediate value. Only negate can get here
- if (Imm.IsFPImm) {
- APFloat F(BitsToDouble(Val));
- F.changeSign();
- Val = F.bitcastToAPInt().getZExtValue();
- } else {
- Val = -Val;
- }
+uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
+{
+ assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
+ assert(Size == 2 || Size == 4 || Size == 8);
+
+ const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
+
+ if (Imm.Mods.Abs) {
+ Val &= ~FpSignMask;
}
+ if (Imm.Mods.Neg) {
+ Val ^= FpSignMask;
+ }
+
+ return Val;
+}
+
+void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
Inst.getNumOperands())) {
- addLiteralImmOperand(Inst, Val);
+ addLiteralImmOperand(Inst, Imm.Val,
+ ApplyModifiers &
+ isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
} else {
- Inst.addOperand(MCOperand::createImm(Val));
+ assert(!isImmTy(ImmTyNone) || !hasModifiers());
+ Inst.addOperand(MCOperand::createImm(Imm.Val));
}
}
-void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
+void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
auto OpNum = Inst.getNumOperands();
// Check that this operand accepts literals
assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
- auto OpSize = AMDGPU::getOperandSize(InstDesc, OpNum); // expected operand size
+ if (ApplyModifiers) {
+ assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
+ const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
+ Val = applyInputFPModifiers(Val, Size);
+ }
+
+ APInt Literal(64, Val);
+ uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
if (Imm.IsFPImm) { // We got fp literal token
- APInt Literal(64, Val);
-
- switch (OpSize) {
- case 8: {
+ switch (OpTy) {
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
@@ -1151,16 +1288,31 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
// in predicate methods (isLiteralImm())
llvm_unreachable("fp literal in 64-bit integer instruction.");
}
- case 4:
- case 2: {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
- FPLiteral.convert(*getFltSemantics(OpSize),
+ FPLiteral.convert(*getOpFltSemantics(OpTy),
APFloat::rmNearestTiesToEven, &lost);
// We allow precision lost but not overflow or underflow. This should be
// checked earlier in isLiteralImm()
- Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
+
+ uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
+ if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
+ OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
+ ImmVal |= (ImmVal << 16);
+ }
+
+ Inst.addOperand(MCOperand::createImm(ImmVal));
return;
}
default:
@@ -1173,8 +1325,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
// We got int literal token.
// Only sign extend inline immediates.
// FIXME: No errors on truncation
- switch (OpSize) {
- case 4: {
+ switch (OpTy) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
if (isInt<32>(Val) &&
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@@ -1185,9 +1340,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
return;
}
- case 8: {
- if (AMDGPU::isInlinableLiteral64(Val,
- AsmParser->hasInv2PiInlineImm())) {
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
+ if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
return;
}
@@ -1195,7 +1352,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
return;
}
- case 2: {
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
if (isInt<16>(Val) &&
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@@ -1206,6 +1366,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
return;
}
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+ auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
+ assert(AMDGPU::isInlinableLiteral16(LiteralVal,
+ AsmParser->hasInv2PiInlineImm()));
+
+ uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
+ static_cast<uint32_t>(LiteralVal);
+ Inst.addOperand(MCOperand::createImm(ImmVal));
+ return;
+ }
default:
llvm_unreachable("invalid operand size");
}
@@ -1289,7 +1460,8 @@ static unsigned getSpecialRegForName(StringRef RegName) {
.Default(0);
}
-bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
auto R = parseRegister();
if (!R) return true;
assert(R->isReg());
@@ -1299,20 +1471,43 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End
return false;
}
-bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum)
-{
+bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
+ RegisterKind RegKind, unsigned Reg1,
+ unsigned RegNum) {
switch (RegKind) {
case IS_SPECIAL:
- if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { Reg = AMDGPU::EXEC; RegWidth = 2; return true; }
- if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { Reg = AMDGPU::FLAT_SCR; RegWidth = 2; return true; }
- if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { Reg = AMDGPU::VCC; RegWidth = 2; return true; }
- if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { Reg = AMDGPU::TBA; RegWidth = 2; return true; }
- if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { Reg = AMDGPU::TMA; RegWidth = 2; return true; }
+ if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
+ Reg = AMDGPU::EXEC;
+ RegWidth = 2;
+ return true;
+ }
+ if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
+ Reg = AMDGPU::FLAT_SCR;
+ RegWidth = 2;
+ return true;
+ }
+ if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
+ Reg = AMDGPU::VCC;
+ RegWidth = 2;
+ return true;
+ }
+ if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
+ Reg = AMDGPU::TBA;
+ RegWidth = 2;
+ return true;
+ }
+ if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
+ Reg = AMDGPU::TMA;
+ RegWidth = 2;
+ return true;
+ }
return false;
case IS_VGPR:
case IS_SGPR:
case IS_TTMP:
- if (Reg1 != Reg + RegWidth) { return false; }
+ if (Reg1 != Reg + RegWidth) {
+ return false;
+ }
RegWidth++;
return true;
default:
@@ -1320,8 +1515,9 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, R
}
}
-bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex)
-{
+bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
+ unsigned &RegNum, unsigned &RegWidth,
+ unsigned *DwordRegIndex) {
if (DwordRegIndex) { *DwordRegIndex = 0; }
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
if (getLexer().is(AsmToken::Identifier)) {
@@ -1462,8 +1658,33 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
}
+bool
+AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
+ if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
+ (getLexer().getKind() == AsmToken::Integer ||
+ getLexer().getKind() == AsmToken::Real)) {
+
+ // This is a workaround for handling operands like these:
+ // |1.0|
+ // |-1|
+ // This syntax is not compatible with syntax of standard
+ // MC expressions (due to the trailing '|').
+
+ SMLoc EndLoc;
+ const MCExpr *Expr;
+
+ if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
+ return true;
+ }
+
+ return !Expr->evaluateAsAbsolute(Val);
+ }
+
+ return getParser().parseAbsoluteExpression(Val);
+}
+
OperandMatchResultTy
-AMDGPUAsmParser::parseImm(OperandVector &Operands) {
+AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
// TODO: add syntactic sugar for 1/(2*PI)
bool Minus = false;
if (getLexer().getKind() == AsmToken::Minus) {
@@ -1475,7 +1696,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) {
switch(getLexer().getKind()) {
case AsmToken::Integer: {
int64_t IntVal;
- if (getParser().parseAbsoluteExpression(IntVal))
+ if (parseAbsoluteExpr(IntVal, AbsMod))
return MatchOperand_ParseFail;
if (Minus)
IntVal *= -1;
@@ -1484,7 +1705,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) {
}
case AsmToken::Real: {
int64_t IntVal;
- if (getParser().parseAbsoluteExpression(IntVal))
+ if (parseAbsoluteExpr(IntVal, AbsMod))
return MatchOperand_ParseFail;
APFloat F(BitsToDouble(IntVal));
@@ -1512,8 +1733,8 @@ AMDGPUAsmParser::parseReg(OperandVector &Operands) {
}
OperandMatchResultTy
-AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) {
- auto res = parseImm(Operands);
+AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
+ auto res = parseImm(Operands, AbsMod);
if (res != MatchOperand_NoMatch) {
return res;
}
@@ -1522,18 +1743,50 @@ AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) {
}
OperandMatchResultTy
-AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm) {
- // XXX: During parsing we can't determine if minus sign means
- // negate-modifier or negative immediate value.
- // By default we suppose it is modifier.
- bool Negate = false, Abs = false, Abs2 = false;
+AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
+ bool AllowImm) {
+ bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
if (getLexer().getKind()== AsmToken::Minus) {
+ const AsmToken NextToken = getLexer().peekTok();
+
+ // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
+ if (NextToken.is(AsmToken::Minus)) {
+ Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
+ return MatchOperand_ParseFail;
+ }
+
+ // '-' followed by an integer literal N should be interpreted as integer
+ // negation rather than a floating-point NEG modifier applied to N.
+ // Beside being contr-intuitive, such use of floating-point NEG modifier
+ // results in different meaning of integer literals used with VOP1/2/C
+ // and VOP3, for example:
+ // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
+ // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
+ // Negative fp literals should be handled likewise for unifomtity
+ if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
+ Parser.Lex();
+ Negate = true;
+ }
+ }
+
+ if (getLexer().getKind() == AsmToken::Identifier &&
+ Parser.getTok().getString() == "neg") {
+ if (Negate) {
+ Error(Parser.getTok().getLoc(), "expected register or immediate");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+ Negate2 = true;
+ if (getLexer().isNot(AsmToken::LParen)) {
+ Error(Parser.getTok().getLoc(), "expected left paren after neg");
+ return MatchOperand_ParseFail;
+ }
Parser.Lex();
- Negate = true;
}
- if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "abs") {
+ if (getLexer().getKind() == AsmToken::Identifier &&
+ Parser.getTok().getString() == "abs") {
Parser.Lex();
Abs2 = true;
if (getLexer().isNot(AsmToken::LParen)) {
@@ -1554,7 +1807,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo
OperandMatchResultTy Res;
if (AllowImm) {
- Res = parseRegOrImm(Operands);
+ Res = parseRegOrImm(Operands, Abs);
} else {
Res = parseReg(Operands);
}
@@ -1563,9 +1816,6 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo
}
AMDGPUOperand::Modifiers Mods;
- if (Negate) {
- Mods.Neg = true;
- }
if (Abs) {
if (getLexer().getKind() != AsmToken::Pipe) {
Error(Parser.getTok().getLoc(), "expected vertical bar");
@@ -1583,6 +1833,17 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo
Mods.Abs = true;
}
+ if (Negate) {
+ Mods.Neg = true;
+ } else if (Negate2) {
+ if (getLexer().isNot(AsmToken::RParen)) {
+ Error(Parser.getTok().getLoc(), "expected closing parentheses");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+ Mods.Neg = true;
+ }
+
if (Mods.hasFPModifiers()) {
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
Op.setModifiers(Mods);
@@ -1591,10 +1852,12 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo
}
OperandMatchResultTy
-AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm) {
+AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
+ bool AllowImm) {
bool Sext = false;
- if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "sext") {
+ if (getLexer().getKind() == AsmToken::Identifier &&
+ Parser.getTok().getString() == "sext") {
Parser.Lex();
Sext = true;
if (getLexer().isNot(AsmToken::LParen)) {
@@ -1661,7 +1924,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands)
}
unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
-
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
@@ -1719,6 +1981,128 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
return makeArrayRef(Variants);
}
+unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
+ const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+ const unsigned Num = Desc.getNumImplicitUses();
+ for (unsigned i = 0; i < Num; ++i) {
+ unsigned Reg = Desc.ImplicitUses[i];
+ switch (Reg) {
+ case AMDGPU::FLAT_SCR:
+ case AMDGPU::VCC:
+ case AMDGPU::M0:
+ return Reg;
+ default:
+ break;
+ }
+ }
+ return AMDGPU::NoRegister;
+}
+
+bool AMDGPUAsmParser::isSGPR(unsigned Reg) {
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+ const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
+ const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
+ return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
+ Reg == AMDGPU::SCC;
+}
+
+// NB: This code is correct only when used to check constant
+// bus limitations because GFX7 support no f16 inline constants.
+// Note that there are no cases when a GFX7 opcode violates
+// constant bus limitations due to the use of an f16 constant.
+bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
+ unsigned OpIdx) const {
+ const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+
+ if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
+ return false;
+ }
+
+ const MCOperand &MO = Inst.getOperand(OpIdx);
+
+ int64_t Val = MO.getImm();
+ auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
+
+ switch (OpSize) { // expected operand size
+ case 8:
+ return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
+ case 4:
+ return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
+ case 2: {
+ const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
+ if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
+ return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
+ } else {
+ return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
+ }
+ }
+ default:
+ llvm_unreachable("invalid operand size");
+ }
+}
+
+bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
+ const MCOperand &MO = Inst.getOperand(OpIdx);
+ if (MO.isImm()) {
+ return !isInlineConstant(Inst, OpIdx);
+ }
+ return !MO.isReg() || isSGPR(mc2PseudoReg(MO.getReg()));
+}
+
+bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) {
+ const unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opcode);
+ unsigned ConstantBusUseCount = 0;
+
+ if (Desc.TSFlags &
+ (SIInstrFlags::VOPC |
+ SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
+ SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) {
+
+ // Check special imm operands (used by madmk, etc)
+ if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
+ ++ConstantBusUseCount;
+ }
+
+ unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
+ if (SGPRUsed != AMDGPU::NoRegister) {
+ ++ConstantBusUseCount;
+ }
+
+ const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+ const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+ const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+
+ for (int OpIdx : OpIndices) {
+ if (OpIdx == -1) break;
+
+ const MCOperand &MO = Inst.getOperand(OpIdx);
+ if (usesConstantBus(Inst, OpIdx)) {
+ if (MO.isReg()) {
+ const unsigned Reg = mc2PseudoReg(MO.getReg());
+ // Pairs of registers with a partial intersections like these
+ // s0, s[0:1]
+ // flat_scratch_lo, flat_scratch
+ // flat_scratch_lo, flat_scratch_hi
+ // are theoretically valid but they are disabled anyway.
+ // Note that this code mimics SIInstrInfo::verifyInstruction
+ if (Reg != SGPRUsed) {
+ ++ConstantBusUseCount;
+ }
+ SGPRUsed = Reg;
+ } else { // Expression or a literal
+ ++ConstantBusUseCount;
+ }
+ }
+ }
+ }
+
+ return ConstantBusUseCount <= 1;
+}
+
bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
@@ -1751,6 +2135,10 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (Result) {
default: break;
case Match_Success:
+ if (!validateOperandLimitations(Inst)) {
+ return Error(IDLoc,
+ "invalid operand (violates constant bus restrictions)");
+ }
Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst, getSTI());
return false;
@@ -1793,7 +2181,6 @@ bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
return false;
}
-
bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
uint32_t &Minor) {
if (ParseAsAbsoluteExpression(Major))
@@ -1810,7 +2197,6 @@ bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
}
bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
-
uint32_t Major;
uint32_t Minor;
@@ -1831,9 +2217,10 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
// If this directive has no arguments, then use the ISA version for the
// targeted GPU.
if (getLexer().is(AsmToken::EndOfStatement)) {
- AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
- getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor,
- Isa.Stepping,
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
+ ISA.Stepping,
"AMD", "AMDGPU");
return false;
}
@@ -1873,42 +2260,45 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
return false;
}
-bool AMDGPUAsmParser::ParseDirectiveRuntimeMetadata() {
- std::string Metadata;
- raw_string_ostream MS(Metadata);
+bool AMDGPUAsmParser::ParseDirectiveCodeObjectMetadata() {
+ std::string YamlString;
+ raw_string_ostream YamlStream(YamlString);
getLexer().setSkipSpace(false);
bool FoundEnd = false;
while (!getLexer().is(AsmToken::Eof)) {
while (getLexer().is(AsmToken::Space)) {
- MS << ' ';
+ YamlStream << getLexer().getTok().getString();
Lex();
}
if (getLexer().is(AsmToken::Identifier)) {
StringRef ID = getLexer().getTok().getIdentifier();
- if (ID == ".end_amdgpu_runtime_metadata") {
+ if (ID == AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd) {
Lex();
FoundEnd = true;
break;
}
}
- MS << Parser.parseStringToEndOfStatement()
- << getContext().getAsmInfo()->getSeparatorString();
+ YamlStream << Parser.parseStringToEndOfStatement()
+ << getContext().getAsmInfo()->getSeparatorString();
Parser.eatToEndOfStatement();
}
getLexer().setSkipSpace(true);
- if (getLexer().is(AsmToken::Eof) && !FoundEnd)
- return TokError("expected directive .end_amdgpu_runtime_metadata not found");
+ if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
+ return TokError(
+ "expected directive .end_amdgpu_code_object_metadata not found");
+ }
- MS.flush();
+ YamlStream.flush();
- getTargetStreamer().EmitRuntimeMetadata(Metadata);
+ if (!getTargetStreamer().EmitCodeObjectMetadata(YamlString))
+ return Error(getParser().getTok().getLoc(), "invalid code object metadata");
return false;
}
@@ -1926,7 +2316,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
amd_kernel_code_t Header;
- AMDGPU::initDefaultAMDKernelCodeT(Header, getSTI().getFeatureBits());
+ AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
@@ -2020,8 +2410,8 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".hsa_code_object_isa")
return ParseDirectiveHSACodeObjectISA();
- if (IDVal == ".amdgpu_runtime_metadata")
- return ParseDirectiveRuntimeMetadata();
+ if (IDVal == AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin)
+ return ParseDirectiveCodeObjectMetadata();
if (IDVal == ".amd_kernel_code_t")
return ParseDirectiveAMDKernelCodeT();
@@ -2080,7 +2470,6 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
OperandMatchResultTy
AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
-
// Try to parse with a custom parser
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -2208,7 +2597,7 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
OperandMatchResultTy
AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy,
+ AMDGPUOperand::ImmTy ImmTy,
bool (*ConvertResult)(int64_t&)) {
SMLoc S = Parser.getTok().getLoc();
int64_t Value = 0;
@@ -2225,9 +2614,59 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
return MatchOperand_Success;
}
+OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
+ const char *Prefix,
+ OperandVector &Operands,
+ AMDGPUOperand::ImmTy ImmTy,
+ bool (*ConvertResult)(int64_t&)) {
+ StringRef Name = Parser.getTok().getString();
+ if (!Name.equals(Prefix))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Colon))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::LBrac))
+ return MatchOperand_ParseFail;
+ Parser.Lex();
+
+ unsigned Val = 0;
+ SMLoc S = Parser.getTok().getLoc();
+
+ // FIXME: How to verify the number of elements matches the number of src
+ // operands?
+ for (int I = 0; I < 3; ++I) {
+ if (I != 0) {
+ if (getLexer().is(AsmToken::RBrac))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return MatchOperand_ParseFail;
+ Parser.Lex();
+ }
+
+ if (getLexer().isNot(AsmToken::Integer))
+ return MatchOperand_ParseFail;
+
+ int64_t Op;
+ if (getParser().parseAbsoluteExpression(Op))
+ return MatchOperand_ParseFail;
+
+ if (Op != 0 && Op != 1)
+ return MatchOperand_ParseFail;
+ Val |= (Op << I);
+ }
+
+ Parser.Lex();
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy) {
+ AMDGPUOperand::ImmTy ImmTy) {
int64_t Bit = 0;
SMLoc S = Parser.getTok().getLoc();
@@ -2257,11 +2696,11 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
return MatchOperand_Success;
}
-typedef std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap;
-
-void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands,
- OptionalImmIndexMap& OptionalIdx,
- enum AMDGPUOperand::ImmTy ImmT, int64_t Default = 0) {
+static void addOptionalImmOperand(
+ MCInst& Inst, const OperandVector& Operands,
+ AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
+ AMDGPUOperand::ImmTy ImmT,
+ int64_t Default = 0) {
auto i = OptionalIdx.find(ImmT);
if (i != OptionalIdx.end()) {
unsigned Idx = i->second;
@@ -2323,9 +2762,9 @@ void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
}
-void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
- std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
- bool GDSOnly = false;
+void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
+ bool IsGdsHardcoded) {
+ OptionalImmIndexMap OptionalIdx;
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
@@ -2337,7 +2776,7 @@ void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
}
if (Op.isToken() && Op.getToken() == "gds") {
- GDSOnly = true;
+ IsGdsHardcoded = true;
continue;
}
@@ -2346,9 +2785,7 @@ void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
-
- if (!GDSOnly) {
+ if (!IsGdsHardcoded) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
}
Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
@@ -2421,13 +2858,14 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
Parser.Lex();
- IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
if (CntName == "vmcnt")
- IntVal = encodeVmcnt(IV, IntVal, CntVal);
+ IntVal = encodeVmcnt(ISA, IntVal, CntVal);
else if (CntName == "expcnt")
- IntVal = encodeExpcnt(IV, IntVal, CntVal);
+ IntVal = encodeExpcnt(ISA, IntVal, CntVal);
else if (CntName == "lgkmcnt")
- IntVal = encodeLgkmcnt(IV, IntVal, CntVal);
+ IntVal = encodeLgkmcnt(ISA, IntVal, CntVal);
else
return true;
@@ -2436,8 +2874,9 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
OperandMatchResultTy
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
- IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
- int64_t Waitcnt = getWaitcntBitMask(IV);
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ int64_t Waitcnt = getWaitcntBitMask(ISA);
SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) {
@@ -2459,7 +2898,8 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
return MatchOperand_Success;
}
-bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width) {
+bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
+ int64_t &Width) {
using namespace llvm::AMDGPU::Hwreg;
if (Parser.getTok().getString() != "hwreg")
@@ -2520,8 +2960,7 @@ bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
return false;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
+OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
using namespace llvm::AMDGPU::Hwreg;
int64_t Imm16Val = 0;
@@ -3170,6 +3609,10 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
{"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
{"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
+ {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
+ {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
+ {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
+ {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
};
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
@@ -3186,6 +3629,12 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operan
res = parseSDWASel(Operands, Op.Name, Op.Type);
} else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
res = parseSDWADstUnused(Operands);
+ } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
+ Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
+ Op.Type == AMDGPUOperand::ImmTyNegLo ||
+ Op.Type == AMDGPUOperand::ImmTyNegHi) {
+ res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
+ Op.ConvertResult);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
}
@@ -3241,8 +3690,8 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
&& Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
}
-void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
- OptionalImmIndexMap OptionalIdx;
+void AMDGPUAsmParser::cvtVOP3Impl(MCInst &Inst, const OperandVector &Operands,
+ OptionalImmIndexMap &OptionalIdx) {
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
@@ -3253,12 +3702,20 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
- } else if (Op.isImm()) {
+ } else if (Op.isImmModifier()) {
OptionalIdx[Op.getImmTy()] = I;
+ } else if (Op.isRegOrImm()) {
+ Op.addRegOrImmOperands(Inst, 1);
} else {
llvm_unreachable("unhandled operand type");
}
}
+}
+
+void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptionalIdx;
+
+ cvtVOP3Impl(Inst, Operands, OptionalIdx);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
@@ -3283,6 +3740,96 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
}
}
+void AMDGPUAsmParser::cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptionalIdx;
+
+ unsigned I = 1;
+ const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+ for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
+ ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
+ }
+
+ for (unsigned E = Operands.size(); I != E; ++I) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+ if (Op.isMod()) {
+ OptionalIdx[Op.getImmTy()] = I;
+ } else {
+ Op.addRegOrImmOperands(Inst, 1);
+ }
+ }
+
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
+}
+
+void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptIdx;
+
+ cvtVOP3Impl(Inst, Operands, OptIdx);
+
+ // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
+ // instruction, and then figure out where to actually put the modifiers
+ int Opc = Inst.getOpcode();
+
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
+ }
+
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1);
+
+ int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
+ if (NegLoIdx != -1) {
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
+ }
+
+ const int Ops[] = { AMDGPU::OpName::src0,
+ AMDGPU::OpName::src1,
+ AMDGPU::OpName::src2 };
+ const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers };
+
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
+
+ unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+ unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
+ unsigned NegLo = 0;
+ unsigned NegHi = 0;
+
+ if (NegLoIdx != -1) {
+ int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
+ NegLo = Inst.getOperand(NegLoIdx).getImm();
+ NegHi = Inst.getOperand(NegHiIdx).getImm();
+ }
+
+ for (int J = 0; J < 3; ++J) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
+ if (OpIdx == -1)
+ break;
+
+ uint32_t ModVal = 0;
+
+ if ((OpSel & (1 << J)) != 0)
+ ModVal |= SISrcMods::OP_SEL_0;
+
+ if ((OpSelHi & (1 << J)) != 0)
+ ModVal |= SISrcMods::OP_SEL_1;
+
+ if ((NegLo & (1 << J)) != 0)
+ ModVal |= SISrcMods::NEG;
+
+ if ((NegHi & (1 << J)) != 0)
+ ModVal |= SISrcMods::NEG_HI;
+
+ int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
+
+ Inst.getOperand(ModIdx).setImm(ModVal);
+ }
+}
+
//===----------------------------------------------------------------------===//
// dpp
//===----------------------------------------------------------------------===//
@@ -3436,7 +3983,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
- // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token.
+ // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
// Skip it.
continue;
} if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
@@ -3547,6 +4094,7 @@ void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
uint64_t BasicInstType) {
+ using namespace llvm::AMDGPU::SDWA;
OptionalImmIndexMap OptionalIdx;
unsigned I = 1;
@@ -3581,21 +4129,21 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
// V_NOP_sdwa_vi has no optional sdwa arguments
switch (BasicInstType) {
case SIInstrFlags::VOP1:
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
break;
case SIInstrFlags::VOP2:
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
case SIInstrFlags::VOPC:
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
default:
diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td
index 45a7fe6d3439..a6609f0725ab 100644
--- a/lib/Target/AMDGPU/BUFInstructions.td
+++ b/lib/Target/AMDGPU/BUFInstructions.td
@@ -21,8 +21,8 @@ def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset"
class MubufLoad <SDPatternOperator op> : PatFrag <
(ops node:$ptr), (op node:$ptr), [{
auto const AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::GLOBAL_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS;
+ return AS == AMDGPUASI.GLOBAL_ADDRESS ||
+ AS == AMDGPUASI.CONSTANT_ADDRESS;
}]>;
def mubuf_load : MubufLoad <load>;
@@ -705,12 +705,6 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
let Predicates = [isGCN] in {
-// int_SI_vs_load_input
-def : Pat<
- (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
- (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, (i32 0), imm:$attr_offset, 0, 0, 0)
->;
-
// Offset in an 32-bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index 02d441756c85..7c0ef4aeac3c 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -12,11 +12,17 @@ tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering)
+if(LLVM_BUILD_GLOBAL_ISEL)
+ tablegen(LLVM AMDGPUGenRegisterBank.inc -gen-register-bank)
+endif()
add_public_tablegen_target(AMDGPUCommonTableGen)
# List of all GlobalISel files.
set(GLOBAL_ISEL_FILES
AMDGPUCallLowering.cpp
+ AMDGPUInstructionSelector.cpp
+ AMDGPULegalizerInfo.cpp
+ AMDGPURegisterBankInfo.cpp
)
# Add GlobalISel files to the dependencies if the user wants to build it.
@@ -30,6 +36,7 @@ endif()
add_llvm_target(AMDGPUCodeGen
AMDILCFGStructurizer.cpp
+ AMDGPUAliasAnalysis.cpp
AMDGPUAlwaysInlinePass.cpp
AMDGPUAnnotateKernelFeatures.cpp
AMDGPUAnnotateUniformValues.cpp
@@ -39,6 +46,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUTargetObjectFile.cpp
AMDGPUIntrinsicInfo.cpp
AMDGPUISelDAGToDAG.cpp
+ AMDGPULowerIntrinsics.cpp
AMDGPUMCInstLower.cpp
AMDGPUMachineFunction.cpp
AMDGPUUnifyMetadata.cpp
@@ -50,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUInstrInfo.cpp
AMDGPUPromoteAlloca.cpp
AMDGPURegisterInfo.cpp
+ AMDGPUUnifyDivergentExitNodes.cpp
GCNHazardRecognizer.cpp
GCNSchedStrategy.cpp
R600ClauseMergePass.cpp
@@ -68,10 +77,12 @@ add_llvm_target(AMDGPUCodeGen
SIDebuggerInsertNops.cpp
SIFixControlFlowLiveIntervals.cpp
SIFixSGPRCopies.cpp
+ SIFixVGPRCopies.cpp
SIFoldOperands.cpp
SIFrameLowering.cpp
SIInsertSkips.cpp
SIInsertWaits.cpp
+ SIInsertWaitcnts.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
SILoadStoreOptimizer.cpp
@@ -80,10 +91,14 @@ add_llvm_target(AMDGPUCodeGen
SIMachineFunctionInfo.cpp
SIMachineScheduler.cpp
SIOptimizeExecMasking.cpp
+ SIPeepholeSDWA.cpp
SIRegisterInfo.cpp
SIShrinkInstructions.cpp
SITypeRewriter.cpp
SIWholeQuadMode.cpp
+ GCNIterativeScheduler.cpp
+ GCNMinRegStrategy.cpp
+ GCNRegPressure.cpp
${GLOBAL_ISEL_BUILD_FILES}
)
diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td
index a077001df6bd..a9f64589fa5e 100644
--- a/lib/Target/AMDGPU/DSInstructions.td
+++ b/lib/Target/AMDGPU/DSInstructions.td
@@ -88,18 +88,6 @@ class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
let has_vdst = 0;
}
-class DS_1A_Off8_NORET<string opName> : DS_Pseudo<opName,
- (outs),
- (ins VGPR_32:$addr, offset0:$offset0, offset1:$offset1, gds:$gds),
- "$addr $offset0$offset1$gds"> {
-
- let has_data0 = 0;
- let has_data1 = 0;
- let has_vdst = 0;
- let has_offset = 0;
- let AsmMatchConverter = "cvtDSOffset01";
-}
-
class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs),
@@ -143,6 +131,20 @@ class DS_1A2D_RET<string opName,
let hasPostISelHook = 1;
}
+class DS_1A2D_Off8_RET<string opName,
+ RegisterClass rc = VGPR_32,
+ RegisterClass src = rc>
+: DS_Pseudo<opName,
+ (outs rc:$vdst),
+ (ins VGPR_32:$addr, src:$data0, src:$data1, offset0:$offset0, offset1:$offset1, gds:$gds),
+ "$vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
+
+ let has_offset = 0;
+ let AsmMatchConverter = "cvtDSOffset01";
+
+ let hasPostISelHook = 1;
+}
+
class DS_1A_RET<string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs rc:$vdst),
@@ -174,6 +176,7 @@ class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
let has_data1 = 0;
let has_gds = 0;
let gdsValue = 1;
+ let AsmMatchConverter = "cvtDSGds";
}
class DS_0A_RET <string opName> : DS_Pseudo<opName,
@@ -202,20 +205,46 @@ class DS_1A <string opName> : DS_Pseudo<opName,
let has_data1 = 0;
}
-class DS_1A_GDS <string opName> : DS_Pseudo<opName,
- (outs),
- (ins VGPR_32:$addr),
- "$addr gds"> {
+class DS_GWS <string opName, dag ins, string asmOps>
+: DS_Pseudo<opName, (outs), ins, asmOps> {
+
+ let has_vdst = 0;
+ let has_addr = 0;
+ let has_data0 = 0;
+ let has_data1 = 0;
+
+ let has_gds = 0;
+ let gdsValue = 1;
+ let AsmMatchConverter = "cvtDSGds";
+}
+
+class DS_GWS_0D <string opName>
+: DS_GWS<opName,
+ (ins offset:$offset, gds:$gds), "$offset gds">;
- let has_vdst = 0;
- let has_data0 = 0;
- let has_data1 = 0;
- let has_offset = 0;
+class DS_GWS_1D <string opName>
+: DS_GWS<opName,
+ (ins VGPR_32:$data0, offset:$offset, gds:$gds), "$data0$offset gds"> {
+
+ let has_data0 = 1;
+}
+
+class DS_VOID <string opName> : DS_Pseudo<opName,
+ (outs), (ins), ""> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 1;
+ let UseNamedOperandTable = 0;
+ let AsmMatchConverter = "";
+
+ let has_vdst = 0;
+ let has_addr = 0;
+ let has_data0 = 0;
+ let has_data1 = 0;
+ let has_offset = 0;
let has_offset0 = 0;
let has_offset1 = 0;
-
- let has_gds = 0;
- let gdsValue = 1;
+ let has_gds = 0;
}
class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag>
@@ -226,6 +255,8 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag>
[(set i32:$vdst,
(node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > {
+ let LGKM_CNT = 0;
+
let mayLoad = 0;
let mayStore = 0;
let isConvergent = 1;
@@ -324,9 +355,9 @@ def DS_MAX_RTN_F32 : DS_1A1D_RET <"ds_max_rtn_f32">,
def DS_WRXCHG_RTN_B32 : DS_1A1D_RET<"ds_wrxchg_rtn_b32">,
AtomicNoRet<"", 1>;
-def DS_WRXCHG2_RTN_B32 : DS_1A2D_RET<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>,
+def DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>,
AtomicNoRet<"", 1>;
-def DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>,
+def DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>,
AtomicNoRet<"", 1>;
def DS_ADD_RTN_U64 : DS_1A1D_RET<"ds_add_rtn_u64", VReg_64>,
@@ -365,17 +396,17 @@ def DS_MAX_RTN_F64 : DS_1A1D_RET<"ds_max_rtn_f64", VReg_64>,
AtomicNoRet<"ds_max_f64", 1>;
def DS_WRXCHG_RTN_B64 : DS_1A1D_RET<"ds_wrxchg_rtn_b64", VReg_64>,
- AtomicNoRet<"ds_wrxchg_b64", 1>;
-def DS_WRXCHG2_RTN_B64 : DS_1A2D_RET<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>,
- AtomicNoRet<"ds_wrxchg2_b64", 1>;
-def DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>,
- AtomicNoRet<"ds_wrxchg2st64_b64", 1>;
-
-def DS_GWS_INIT : DS_1A_GDS<"ds_gws_init">;
-def DS_GWS_SEMA_V : DS_1A_GDS<"ds_gws_sema_v">;
-def DS_GWS_SEMA_BR : DS_1A_GDS<"ds_gws_sema_br">;
-def DS_GWS_SEMA_P : DS_1A_GDS<"ds_gws_sema_p">;
-def DS_GWS_BARRIER : DS_1A_GDS<"ds_gws_barrier">;
+ AtomicNoRet<"", 1>;
+def DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>,
+ AtomicNoRet<"", 1>;
+def DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>,
+ AtomicNoRet<"", 1>;
+
+def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init">;
+def DS_GWS_SEMA_V : DS_GWS_0D<"ds_gws_sema_v">;
+def DS_GWS_SEMA_BR : DS_GWS_1D<"ds_gws_sema_br">;
+def DS_GWS_SEMA_P : DS_GWS_0D<"ds_gws_sema_p">;
+def DS_GWS_BARRIER : DS_GWS_1D<"ds_gws_barrier">;
def DS_ADD_SRC2_U32 : DS_1A<"ds_add_src2_u32">;
def DS_SUB_SRC2_U32 : DS_1A<"ds_sub_src2_u32">;
@@ -386,7 +417,7 @@ def DS_MIN_SRC2_I32 : DS_1A<"ds_min_src2_i32">;
def DS_MAX_SRC2_I32 : DS_1A<"ds_max_src2_i32">;
def DS_MIN_SRC2_U32 : DS_1A<"ds_min_src2_u32">;
def DS_MAX_SRC2_U32 : DS_1A<"ds_max_src2_u32">;
-def DS_AND_SRC2_B32 : DS_1A<"ds_and_src_b32">;
+def DS_AND_SRC2_B32 : DS_1A<"ds_and_src2_b32">;
def DS_OR_SRC2_B32 : DS_1A<"ds_or_src2_b32">;
def DS_XOR_SRC2_B32 : DS_1A<"ds_xor_src2_b32">;
def DS_MIN_SRC2_F32 : DS_1A<"ds_min_src2_f32">;
@@ -407,8 +438,8 @@ def DS_XOR_SRC2_B64 : DS_1A<"ds_xor_src2_b64">;
def DS_MIN_SRC2_F64 : DS_1A<"ds_min_src2_f64">;
def DS_MAX_SRC2_F64 : DS_1A<"ds_max_src2_f64">;
-def DS_WRITE_SRC2_B32 : DS_1A_Off8_NORET<"ds_write_src2_b32">;
-def DS_WRITE_SRC2_B64 : DS_1A_Off8_NORET<"ds_write_src2_b64">;
+def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">;
+def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32">;
@@ -429,30 +460,34 @@ def DS_READ2_B64 : DS_1A_Off8_RET<"ds_read2_b64", VReg_128>;
def DS_READ2ST64_B64 : DS_1A_Off8_RET<"ds_read2st64_b64", VReg_128>;
}
-let SubtargetPredicate = isSICI in {
def DS_CONSUME : DS_0A_RET<"ds_consume">;
def DS_APPEND : DS_0A_RET<"ds_append">;
def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
-}
//===----------------------------------------------------------------------===//
// Instruction definitions for CI and newer.
//===----------------------------------------------------------------------===//
-// Remaining instructions:
-// DS_NOP
-// DS_GWS_SEMA_RELEASE_ALL
-// DS_WRAP_RTN_B32
-// DS_CNDXCHG32_RTN_B64
-// DS_WRITE_B96
-// DS_WRITE_B128
-// DS_CONDXCHG32_RTN_B128
-// DS_READ_B96
-// DS_READ_B128
let SubtargetPredicate = isCIVI in {
-def DS_WRAP_RTN_F32 : DS_1A1D_RET <"ds_wrap_rtn_f32">,
- AtomicNoRet<"ds_wrap_f32", 1>;
+def DS_WRAP_RTN_B32 : DS_1A2D_RET<"ds_wrap_rtn_b32">, AtomicNoRet<"", 1>;
+
+def DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET<"ds_condxchg32_rtn_b64", VReg_64>,
+ AtomicNoRet<"", 1>;
+
+def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">;
+
+let mayStore = 0 in {
+def DS_READ_B96 : DS_1A_RET<"ds_read_b96", VReg_96>;
+def DS_READ_B128: DS_1A_RET<"ds_read_b128", VReg_128>;
+} // End mayStore = 0
+
+let mayLoad = 0 in {
+def DS_WRITE_B96 : DS_1A1D_NORET<"ds_write_b96", VReg_96>;
+def DS_WRITE_B128 : DS_1A1D_NORET<"ds_write_b128", VReg_128>;
+} // End mayLoad = 0
+
+def DS_NOP : DS_VOID<"ds_nop">;
} // let SubtargetPredicate = isCIVI
@@ -623,6 +658,7 @@ def DS_CMPST_B32_si : DS_Real_si<0x10, DS_CMPST_B32>;
def DS_CMPST_F32_si : DS_Real_si<0x11, DS_CMPST_F32>;
def DS_MIN_F32_si : DS_Real_si<0x12, DS_MIN_F32>;
def DS_MAX_F32_si : DS_Real_si<0x13, DS_MAX_F32>;
+def DS_NOP_si : DS_Real_si<0x14, DS_NOP>;
def DS_GWS_INIT_si : DS_Real_si<0x19, DS_GWS_INIT>;
def DS_GWS_SEMA_V_si : DS_Real_si<0x1a, DS_GWS_SEMA_V>;
def DS_GWS_SEMA_BR_si : DS_Real_si<0x1b, DS_GWS_SEMA_BR>;
@@ -651,8 +687,10 @@ def DS_CMPST_RTN_F32_si : DS_Real_si<0x31, DS_CMPST_RTN_F32>;
def DS_MIN_RTN_F32_si : DS_Real_si<0x32, DS_MIN_RTN_F32>;
def DS_MAX_RTN_F32_si : DS_Real_si<0x33, DS_MAX_RTN_F32>;
-// FIXME: this instruction is actually CI/VI
-def DS_WRAP_RTN_F32_si : DS_Real_si<0x34, DS_WRAP_RTN_F32>;
+// These instruction are CI/VI only
+def DS_WRAP_RTN_B32_si : DS_Real_si<0x34, DS_WRAP_RTN_B32>;
+def DS_CONDXCHG32_RTN_B64_si : DS_Real_si<0x7e, DS_CONDXCHG32_RTN_B64>;
+def DS_GWS_SEMA_RELEASE_ALL_si : DS_Real_si<0x18, DS_GWS_SEMA_RELEASE_ALL>;
def DS_SWIZZLE_B32_si : DS_Real_si<0x35, DS_SWIZZLE_B32>;
def DS_READ_B32_si : DS_Real_si<0x36, DS_READ_B32>;
@@ -744,6 +782,10 @@ def DS_WRITE_SRC2_B64_si : DS_Real_si<0xcd, DS_WRITE_SRC2_B64>;
def DS_MIN_SRC2_F64_si : DS_Real_si<0xd2, DS_MIN_SRC2_F64>;
def DS_MAX_SRC2_F64_si : DS_Real_si<0xd3, DS_MAX_SRC2_F64>;
+def DS_WRITE_B96_si : DS_Real_si<0xde, DS_WRITE_B96>;
+def DS_WRITE_B128_si : DS_Real_si<0xdf, DS_WRITE_B128>;
+def DS_READ_B96_si : DS_Real_si<0xfe, DS_READ_B96>;
+def DS_READ_B128_si : DS_Real_si<0xff, DS_READ_B128>;
//===----------------------------------------------------------------------===//
// VIInstructions.td
@@ -787,12 +829,13 @@ def DS_CMPST_B32_vi : DS_Real_vi<0x10, DS_CMPST_B32>;
def DS_CMPST_F32_vi : DS_Real_vi<0x11, DS_CMPST_F32>;
def DS_MIN_F32_vi : DS_Real_vi<0x12, DS_MIN_F32>;
def DS_MAX_F32_vi : DS_Real_vi<0x13, DS_MAX_F32>;
+def DS_NOP_vi : DS_Real_vi<0x14, DS_NOP>;
def DS_ADD_F32_vi : DS_Real_vi<0x15, DS_ADD_F32>;
-def DS_GWS_INIT_vi : DS_Real_vi<0x19, DS_GWS_INIT>;
-def DS_GWS_SEMA_V_vi : DS_Real_vi<0x1a, DS_GWS_SEMA_V>;
-def DS_GWS_SEMA_BR_vi : DS_Real_vi<0x1b, DS_GWS_SEMA_BR>;
-def DS_GWS_SEMA_P_vi : DS_Real_vi<0x1c, DS_GWS_SEMA_P>;
-def DS_GWS_BARRIER_vi : DS_Real_vi<0x1d, DS_GWS_BARRIER>;
+def DS_GWS_INIT_vi : DS_Real_vi<0x99, DS_GWS_INIT>;
+def DS_GWS_SEMA_V_vi : DS_Real_vi<0x9a, DS_GWS_SEMA_V>;
+def DS_GWS_SEMA_BR_vi : DS_Real_vi<0x9b, DS_GWS_SEMA_BR>;
+def DS_GWS_SEMA_P_vi : DS_Real_vi<0x9c, DS_GWS_SEMA_P>;
+def DS_GWS_BARRIER_vi : DS_Real_vi<0x9d, DS_GWS_BARRIER>;
def DS_WRITE_B8_vi : DS_Real_vi<0x1e, DS_WRITE_B8>;
def DS_WRITE_B16_vi : DS_Real_vi<0x1f, DS_WRITE_B16>;
def DS_ADD_RTN_U32_vi : DS_Real_vi<0x20, DS_ADD_RTN_U32>;
@@ -815,7 +858,7 @@ def DS_CMPST_RTN_B32_vi : DS_Real_vi<0x30, DS_CMPST_RTN_B32>;
def DS_CMPST_RTN_F32_vi : DS_Real_vi<0x31, DS_CMPST_RTN_F32>;
def DS_MIN_RTN_F32_vi : DS_Real_vi<0x32, DS_MIN_RTN_F32>;
def DS_MAX_RTN_F32_vi : DS_Real_vi<0x33, DS_MAX_RTN_F32>;
-def DS_WRAP_RTN_F32_vi : DS_Real_vi<0x34, DS_WRAP_RTN_F32>;
+def DS_WRAP_RTN_B32_vi : DS_Real_vi<0x34, DS_WRAP_RTN_B32>;
def DS_ADD_RTN_F32_vi : DS_Real_vi<0x35, DS_ADD_RTN_F32>;
def DS_READ_B32_vi : DS_Real_vi<0x36, DS_READ_B32>;
def DS_READ2_B32_vi : DS_Real_vi<0x37, DS_READ2_B32>;
@@ -824,6 +867,9 @@ def DS_READ_I8_vi : DS_Real_vi<0x39, DS_READ_I8>;
def DS_READ_U8_vi : DS_Real_vi<0x3a, DS_READ_U8>;
def DS_READ_I16_vi : DS_Real_vi<0x3b, DS_READ_I16>;
def DS_READ_U16_vi : DS_Real_vi<0x3c, DS_READ_U16>;
+def DS_CONSUME_vi : DS_Real_vi<0xbd, DS_CONSUME>;
+def DS_APPEND_vi : DS_Real_vi<0xbe, DS_APPEND>;
+def DS_ORDERED_COUNT_vi : DS_Real_vi<0xbf, DS_ORDERED_COUNT>;
def DS_SWIZZLE_B32_vi : DS_Real_vi<0x3d, DS_SWIZZLE_B32>;
def DS_PERMUTE_B32_vi : DS_Real_vi<0x3e, DS_PERMUTE_B32>;
def DS_BPERMUTE_B32_vi : DS_Real_vi<0x3f, DS_BPERMUTE_B32>;
@@ -865,6 +911,8 @@ def DS_MSKOR_RTN_B64_vi : DS_Real_vi<0x6c, DS_MSKOR_RTN_B64>;
def DS_WRXCHG_RTN_B64_vi : DS_Real_vi<0x6d, DS_WRXCHG_RTN_B64>;
def DS_WRXCHG2_RTN_B64_vi : DS_Real_vi<0x6e, DS_WRXCHG2_RTN_B64>;
def DS_WRXCHG2ST64_RTN_B64_vi : DS_Real_vi<0x6f, DS_WRXCHG2ST64_RTN_B64>;
+def DS_CONDXCHG32_RTN_B64_vi : DS_Real_vi<0x7e, DS_CONDXCHG32_RTN_B64>;
+def DS_GWS_SEMA_RELEASE_ALL_vi : DS_Real_vi<0x98, DS_GWS_SEMA_RELEASE_ALL>;
def DS_CMPST_RTN_B64_vi : DS_Real_vi<0x70, DS_CMPST_RTN_B64>;
def DS_CMPST_RTN_F64_vi : DS_Real_vi<0x71, DS_CMPST_RTN_F64>;
def DS_MIN_RTN_F64_vi : DS_Real_vi<0x72, DS_MIN_RTN_F64>;
@@ -904,3 +952,7 @@ def DS_XOR_SRC2_B64_vi : DS_Real_vi<0xcb, DS_XOR_SRC2_B64>;
def DS_WRITE_SRC2_B64_vi : DS_Real_vi<0xcd, DS_WRITE_SRC2_B64>;
def DS_MIN_SRC2_F64_vi : DS_Real_vi<0xd2, DS_MIN_SRC2_F64>;
def DS_MAX_SRC2_F64_vi : DS_Real_vi<0xd3, DS_MAX_SRC2_F64>;
+def DS_WRITE_B96_vi : DS_Real_vi<0xde, DS_WRITE_B96>;
+def DS_WRITE_B128_vi : DS_Real_vi<0xdf, DS_WRITE_B128>;
+def DS_READ_B96_vi : DS_Real_vi<0xfe, DS_READ_B96>;
+def DS_READ_B128_vi : DS_Real_vi<0xff, DS_READ_B128>;
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 2247cad7bb51..4fb03b62bba9 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -22,6 +22,7 @@
#include "AMDGPURegisterInfo.h"
#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
@@ -97,9 +98,13 @@ static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
}
-#define GET_SUBTARGETINFO_ENUM
-#include "AMDGPUGenSubtargetInfo.inc"
-#undef GET_SUBTARGETINFO_ENUM
+static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
+}
#include "AMDGPUGenDisassemblerTables.inc"
@@ -138,7 +143,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
CommentStream = &CS;
// ToDo: AMDGPUDisassembler supports only VI ISA.
- assert(AMDGPU::isVI(STI) && "Can disassemble only VI ISA.");
+ if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding])
+ report_fatal_error("Disassembly not yet supported for subtarget");
const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size());
Bytes = Bytes_.slice(0, MaxInstBytesNum);
@@ -179,6 +185,17 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
} while (false);
+ if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
+ MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
+ MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi)) {
+ // Insert dummy unused src2_modifiers.
+ int Src2ModIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::src2_modifiers);
+ auto I = MI.begin();
+ std::advance(I, Src2ModIdx);
+ MI.insert(I, MCOperand::createImm(0));
+ }
+
Size = Res ? (MaxInstBytesNum - Bytes.size()) : 0;
return Res;
}
@@ -263,6 +280,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
return decodeSrcOp(OPW16, Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
+ return decodeSrcOp(OPWV216, Val);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
// Some instructions have operand restrictions beyond what the encoding
// allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
@@ -423,6 +444,7 @@ MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
case OPW64:
return MCOperand::createImm(getInlineImmVal64(Imm));
case OPW16:
+ case OPWV216:
return MCOperand::createImm(getInlineImmVal16(Imm));
default:
llvm_unreachable("implement me");
@@ -436,6 +458,7 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
default: // fall
case OPW32:
case OPW16:
+ case OPWV216:
return VGPR_32RegClassID;
case OPW64: return VReg_64RegClassID;
case OPW128: return VReg_128RegClassID;
@@ -449,6 +472,7 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
default: // fall
case OPW32:
case OPW16:
+ case OPWV216:
return SGPR_32RegClassID;
case OPW64: return SGPR_64RegClassID;
case OPW128: return SGPR_128RegClassID;
@@ -462,6 +486,7 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
default: // fall
case OPW32:
case OPW16:
+ case OPWV216:
return TTMP_32RegClassID;
case OPW64: return TTMP_64RegClassID;
case OPW128: return TTMP_128RegClassID;
@@ -497,6 +522,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
switch (Width) {
case OPW32:
case OPW16:
+ case OPWV216:
return decodeSpecialReg32(Val);
case OPW64:
return decodeSpecialReg64(Val);
@@ -522,6 +548,11 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
case 124: return createRegOperand(M0);
case 126: return createRegOperand(EXEC_LO);
case 127: return createRegOperand(EXEC_HI);
+ case 235: return createRegOperand(SRC_SHARED_BASE);
+ case 236: return createRegOperand(SRC_SHARED_LIMIT);
+ case 237: return createRegOperand(SRC_PRIVATE_BASE);
+ case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
+ // TODO: SRC_POPS_EXITING_WAVE_ID
// ToDo: no support for vccz register
case 251: break;
// ToDo: no support for execz register
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index ee5883a984e0..d50665187e10 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -67,6 +67,7 @@ public:
MCOperand decodeOperand_VS_32(unsigned Val) const;
MCOperand decodeOperand_VS_64(unsigned Val) const;
MCOperand decodeOperand_VSrc16(unsigned Val) const;
+ MCOperand decodeOperand_VSrcV216(unsigned Val) const;
MCOperand decodeOperand_VReg_64(unsigned Val) const;
MCOperand decodeOperand_VReg_96(unsigned Val) const;
@@ -85,6 +86,7 @@ public:
OPW64,
OPW128,
OPW16,
+ OPWV216,
OPW_LAST_,
OPW_FIRST_ = OPW32
};
diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td
index 48c6592ca5b2..5480110d8315 100644
--- a/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -35,28 +35,59 @@ class CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag
: EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins,
"MEM_RAT_CACHELESS "#name, pattern>;
-class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name,
- list<dag> pattern>
- : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins,
+class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins,
+ dag outs, string name, list<dag> pattern>
+ : EG_CF_RAT <0x56, rat_inst, rat_id, mask, outs, ins,
"MEM_RAT "#name, pattern>;
class CF_MEM_RAT_STORE_TYPED<bits<1> has_eop>
- : CF_MEM_RAT <0x1, ?, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr,
- i32imm:$rat_id, InstFlag:$eop),
+ : CF_MEM_RAT <0x1, ?, 0xf, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr,
+ i32imm:$rat_id, InstFlag:$eop), (outs),
"STORE_TYPED RAT($rat_id) $rw_gpr, $index_gpr"
#!if(has_eop, ", $eop", ""),
[(int_r600_rat_store_typed R600_Reg128:$rw_gpr,
R600_Reg128:$index_gpr,
(i32 imm:$rat_id))]>;
-def RAT_MSKOR : CF_MEM_RAT <0x11, 0,
- (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
+def RAT_MSKOR : CF_MEM_RAT <0x11, 0, 0xf,
+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), (outs),
"MSKOR $rw_gpr.XW, $index_gpr",
[(mskor_global v4i32:$rw_gpr, i32:$index_gpr)]
> {
let eop = 0;
}
+
+multiclass RAT_ATOMIC<bits<6> op_ret, bits<6> op_noret, string name> {
+ let Constraints = "$rw_gpr = $out_gpr", eop = 0, mayStore = 1 in {
+ def _RTN: CF_MEM_RAT <op_ret, 0, 0xf,
+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
+ (outs R600_Reg128:$out_gpr),
+ name ## "_RTN" ## " $rw_gpr, $index_gpr", [] >;
+ def _NORET: CF_MEM_RAT <op_noret, 0, 0xf,
+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
+ (outs R600_Reg128:$out_gpr),
+ name ## " $rw_gpr, $index_gpr", [] >;
+ }
+}
+
+// Swap no-ret is just store. Raw store to cached target
+// can only store on dword, which exactly matches swap_no_ret.
+defm RAT_ATOMIC_XCHG_INT : RAT_ATOMIC<1, 34, "ATOMIC_XCHG_INT">;
+defm RAT_ATOMIC_CMPXCHG_INT : RAT_ATOMIC<4, 36, "ATOMIC_CMPXCHG_INT">;
+defm RAT_ATOMIC_ADD : RAT_ATOMIC<7, 39, "ATOMIC_ADD">;
+defm RAT_ATOMIC_SUB : RAT_ATOMIC<8, 40, "ATOMIC_SUB">;
+defm RAT_ATOMIC_RSUB : RAT_ATOMIC<9, 41, "ATOMIC_RSUB">;
+defm RAT_ATOMIC_MIN_INT : RAT_ATOMIC<10, 42, "ATOMIC_MIN_INT">;
+defm RAT_ATOMIC_MIN_UINT : RAT_ATOMIC<11, 43, "ATOMIC_MIN_UINT">;
+defm RAT_ATOMIC_MAX_INT : RAT_ATOMIC<12, 44, "ATOMIC_MAX_INT">;
+defm RAT_ATOMIC_MAX_UINT : RAT_ATOMIC<13, 45, "ATOMIC_MAX_UINT">;
+defm RAT_ATOMIC_AND : RAT_ATOMIC<14, 46, "ATOMIC_AND">;
+defm RAT_ATOMIC_OR : RAT_ATOMIC<15, 47, "ATOMIC_OR">;
+defm RAT_ATOMIC_XOR : RAT_ATOMIC<16, 48, "ATOMIC_XOR">;
+defm RAT_ATOMIC_INC_UINT : RAT_ATOMIC<18, 50, "ATOMIC_INC_UINT">;
+defm RAT_ATOMIC_DEC_UINT : RAT_ATOMIC<19, 51, "ATOMIC_DEC_UINT">;
+
} // End let Predicates = [isEGorCayman]
//===----------------------------------------------------------------------===//
@@ -257,6 +288,76 @@ def : Pat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
let Predicates = [isEGorCayman] in {
+multiclass AtomicPat<Instruction inst_ret, Instruction inst_noret,
+ SDPatternOperator node_ret, SDPatternOperator node_noret> {
+ // FIXME: Add _RTN version. We need per WI scratch location to store the old value
+ // EXTRACT_SUBREG here is dummy, we know the node has no uses
+ def : Pat<(i32 (node_noret i32:$ptr, i32:$data)),
+ (EXTRACT_SUBREG (inst_noret
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), $data, sub0), $ptr), sub1)>;
+}
+multiclass AtomicIncDecPat<Instruction inst_ret, Instruction inst_noret,
+ SDPatternOperator node_ret, SDPatternOperator node_noret, int C> {
+ // FIXME: Add _RTN version. We need per WI scratch location to store the old value
+ // EXTRACT_SUBREG here is dummy, we know the node has no uses
+ def : Pat<(i32 (node_noret i32:$ptr, C)),
+ (EXTRACT_SUBREG (inst_noret
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (MOV_IMM_I32 -1), sub0), $ptr), sub1)>;
+}
+
+// CMPSWAP is pattern is special
+// EXTRACT_SUBREG here is dummy, we know the node has no uses
+// FIXME: Add _RTN version. We need per WI scratch location to store the old value
+def : Pat<(i32 (atomic_cmp_swap_global_noret i32:$ptr, i32:$cmp, i32:$data)),
+ (EXTRACT_SUBREG (RAT_ATOMIC_CMPXCHG_INT_NORET
+ (INSERT_SUBREG
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), $cmp, sub3),
+ $data, sub0),
+ $ptr), sub1)>;
+
+defm AtomicSwapPat : AtomicPat <RAT_ATOMIC_XCHG_INT_RTN,
+ RAT_ATOMIC_XCHG_INT_NORET,
+ atomic_swap_global_ret,
+ atomic_swap_global_noret>;
+defm AtomicAddPat : AtomicPat <RAT_ATOMIC_ADD_RTN, RAT_ATOMIC_ADD_NORET,
+ atomic_add_global_ret, atomic_add_global_noret>;
+defm AtomicSubPat : AtomicPat <RAT_ATOMIC_SUB_RTN, RAT_ATOMIC_SUB_NORET,
+ atomic_sub_global_ret, atomic_sub_global_noret>;
+defm AtomicMinPat : AtomicPat <RAT_ATOMIC_MIN_INT_RTN,
+ RAT_ATOMIC_MIN_INT_NORET,
+ atomic_min_global_ret, atomic_min_global_noret>;
+defm AtomicUMinPat : AtomicPat <RAT_ATOMIC_MIN_UINT_RTN,
+ RAT_ATOMIC_MIN_UINT_NORET,
+ atomic_umin_global_ret, atomic_umin_global_noret>;
+defm AtomicMaxPat : AtomicPat <RAT_ATOMIC_MAX_INT_RTN,
+ RAT_ATOMIC_MAX_INT_NORET,
+ atomic_max_global_ret, atomic_max_global_noret>;
+defm AtomicUMaxPat : AtomicPat <RAT_ATOMIC_MAX_UINT_RTN,
+ RAT_ATOMIC_MAX_UINT_NORET,
+ atomic_umax_global_ret, atomic_umax_global_noret>;
+defm AtomicAndPat : AtomicPat <RAT_ATOMIC_AND_RTN, RAT_ATOMIC_AND_NORET,
+ atomic_and_global_ret, atomic_and_global_noret>;
+defm AtomicOrPat : AtomicPat <RAT_ATOMIC_OR_RTN, RAT_ATOMIC_OR_NORET,
+ atomic_or_global_ret, atomic_or_global_noret>;
+defm AtomicXorPat : AtomicPat <RAT_ATOMIC_XOR_RTN, RAT_ATOMIC_XOR_NORET,
+ atomic_xor_global_ret, atomic_xor_global_noret>;
+defm AtomicIncAddPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN,
+ RAT_ATOMIC_INC_UINT_NORET,
+ atomic_add_global_ret,
+ atomic_add_global_noret, 1>;
+defm AtomicIncSubPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN,
+ RAT_ATOMIC_INC_UINT_NORET,
+ atomic_sub_global_ret,
+ atomic_sub_global_noret, -1>;
+defm AtomicDecAddPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN,
+ RAT_ATOMIC_DEC_UINT_NORET,
+ atomic_add_global_ret,
+ atomic_add_global_noret, -1>;
+defm AtomicDecSubPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN,
+ RAT_ATOMIC_DEC_UINT_NORET,
+ atomic_sub_global_ret,
+ atomic_sub_global_noret, 1>;
+
// Should be predicated on FeatureFP64
// def FMA_64 : R600_3OP <
// 0xA, "FMA_64",
@@ -287,7 +388,7 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
VecALU
>;
-def : BFEPattern <BFE_UINT_eg, MOV_IMM_I32>;
+defm : BFEPattern <BFE_UINT_eg, BFE_INT_eg, MOV_IMM_I32>;
def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
[(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
@@ -337,7 +438,7 @@ defm CUBE_eg : CUBE_Common<0xC0>;
def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>;
def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>;
-def FLT32_TO_FLT16 : R600_1OP_Helper <0xA2, "FLT32_TO_FLT16", fp_to_f16, VecALU>;
+def FLT32_TO_FLT16 : R600_1OP_Helper <0xA2, "FLT32_TO_FLT16", AMDGPUfp_to_f16, VecALU>;
def FLT16_TO_FLT32 : R600_1OP_Helper <0xA3, "FLT16_TO_FLT32", f16_to_fp, VecALU>;
def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", AMDGPUffbh_u32, VecALU>;
diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td
index 849fb8ad50f5..b0ac0e689a0b 100644
--- a/lib/Target/AMDGPU/FLATInstructions.td
+++ b/lib/Target/AMDGPU/FLATInstructions.td
@@ -136,7 +136,7 @@ multiclass FLAT_Atomic_Pseudo<
class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
>;
def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
@@ -284,16 +284,16 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
(ld node:$ptr), [{
auto const AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::GLOBAL_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS;
+ return AS == AMDGPUASI.FLAT_ADDRESS ||
+ AS == AMDGPUASI.GLOBAL_ADDRESS ||
+ AS == AMDGPUASI.CONSTANT_ADDRESS;
}]>;
class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
(st node:$val, node:$ptr), [{
auto const AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::GLOBAL_ADDRESS;
+ return AS == AMDGPUASI.FLAT_ADDRESS ||
+ AS == AMDGPUASI.GLOBAL_ADDRESS;
}]>;
def atomic_flat_load : flat_ld <atomic_load>;
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index dd3b46f13921..80fc4ac9d2a3 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -11,11 +11,24 @@
//
//===----------------------------------------------------------------------===//
-#include "GCNHazardRecognizer.h"
#include "AMDGPUSubtarget.h"
+#include "GCNHazardRecognizer.h"
+#include "SIDefines.h"
#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <set>
+#include <vector>
using namespace llvm;
@@ -26,7 +39,8 @@ using namespace llvm;
GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
CurrCycleInstr(nullptr),
MF(MF),
- ST(MF.getSubtarget<SISubtarget>()) {
+ ST(MF.getSubtarget<SISubtarget>()),
+ TII(*ST.getInstrInfo()) {
MaxLookAhead = 5;
}
@@ -58,8 +72,19 @@ static bool isRFE(unsigned Opcode) {
return Opcode == AMDGPU::S_RFE_B64;
}
-static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
+static bool isSMovRel(unsigned Opcode) {
+ switch (Opcode) {
+ case AMDGPU::S_MOVRELS_B32:
+ case AMDGPU::S_MOVRELS_B64:
+ case AMDGPU::S_MOVRELD_B32:
+ case AMDGPU::S_MOVRELD_B64:
+ return true;
+ default:
+ return false;
+ }
+}
+static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
AMDGPU::OpName::simm16);
return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
@@ -96,6 +121,13 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
return NoopHazard;
+ if ((TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
+ checkReadM0Hazards(MI) > 0)
+ return NoopHazard;
+
+ if (checkAnyInstHazards(MI) > 0)
+ return NoopHazard;
+
return NoHazard;
}
@@ -104,11 +136,13 @@ unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
}
unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
+ int WaitStates = std::max(0, checkAnyInstHazards(MI));
+
if (SIInstrInfo::isSMRD(*MI))
- return std::max(0, checkSMRDHazards(MI));
+ return std::max(WaitStates, checkSMRDHazards(MI));
if (SIInstrInfo::isVALU(*MI)) {
- int WaitStates = std::max(0, checkVALUHazards(MI));
+ WaitStates = std::max(WaitStates, checkVALUHazards(MI));
if (SIInstrInfo::isVMEM(*MI))
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
@@ -122,19 +156,25 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
if (isRWLane(MI->getOpcode()))
WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
+ if (TII.isVINTRP(*MI))
+ WaitStates = std::max(WaitStates, checkReadM0Hazards(MI));
+
return WaitStates;
}
if (isSGetReg(MI->getOpcode()))
- return std::max(0, checkGetRegHazards(MI));
+ return std::max(WaitStates, checkGetRegHazards(MI));
if (isSSetReg(MI->getOpcode()))
- return std::max(0, checkSetRegHazards(MI));
+ return std::max(WaitStates, checkSetRegHazards(MI));
if (isRFE(MI->getOpcode()))
- return std::max(0, checkRFEHazards(MI));
+ return std::max(WaitStates, checkRFEHazards(MI));
- return 0;
+ if (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))
+ return std::max(WaitStates, checkReadM0Hazards(MI));
+
+ return WaitStates;
}
void GCNHazardRecognizer::EmitNoop() {
@@ -142,14 +182,12 @@ void GCNHazardRecognizer::EmitNoop() {
}
void GCNHazardRecognizer::AdvanceCycle() {
-
// When the scheduler detects a stall, it will call AdvanceCycle() without
// emitting any instructions.
if (!CurrCycleInstr)
return;
- const SIInstrInfo *TII = ST.getInstrInfo();
- unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr);
+ unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
// Keep track of emitted instructions
EmittedInstrs.push_front(CurrCycleInstr);
@@ -180,7 +218,6 @@ void GCNHazardRecognizer::RecedeCycle() {
int GCNHazardRecognizer::getWaitStatesSince(
function_ref<bool(MachineInstr *)> IsHazard) {
-
int WaitStates = -1;
for (MachineInstr *MI : EmittedInstrs) {
++WaitStates;
@@ -204,7 +241,6 @@ int GCNHazardRecognizer::getWaitStatesSinceDef(
int GCNHazardRecognizer::getWaitStatesSinceSetReg(
function_ref<bool(MachineInstr *)> IsHazard) {
-
auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
return isSSetReg(MI->getOpcode()) && IsHazard(MI);
};
@@ -281,7 +317,6 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- const SIInstrInfo *TII = ST.getInstrInfo();
int WaitStatesNeeded = 0;
WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD);
@@ -293,7 +328,7 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
// A read of an SGPR by SMRD instruction requires 4 wait states when the
// SGPR was written by a VALU instruction.
int SmrdSgprWaitStates = 4;
- auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
+ auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
for (const MachineOperand &Use : SMRD->uses()) {
if (!Use.isReg())
@@ -486,7 +521,6 @@ int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
}
int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
-
if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 0;
@@ -500,3 +534,42 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
return RFEWaitStates - WaitStatesNeeded;
}
+
+int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
+ if (MI->isDebugValue())
+ return 0;
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ if (!ST.hasSMovFedHazard())
+ return 0;
+
+ // Check for any instruction reading an SGPR after a write from
+ // s_mov_fed_b32.
+ int MovFedWaitStates = 1;
+ int WaitStatesNeeded = 0;
+
+ for (const MachineOperand &Use : MI->uses()) {
+ if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
+ continue;
+ auto IsHazardFn = [] (MachineInstr *MI) {
+ return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
+ };
+ int WaitStatesNeededForUse =
+ MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
+
+ return WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
+ if (!ST.hasReadM0Hazard())
+ return 0;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ int SMovRelWaitStates = 1;
+ auto IsHazardFn = [TII] (MachineInstr *MI) {
+ return TII->isSALU(*MI);
+ };
+ return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn);
+}
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.h b/lib/Target/AMDGPU/GCNHazardRecognizer.h
index 0ab82ff4635b..5680c3de6a1a 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -34,6 +34,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
std::list<MachineInstr*> EmittedInstrs;
const MachineFunction &MF;
const SISubtarget &ST;
+ const SIInstrInfo &TII;
int getWaitStatesSince(function_ref<bool(MachineInstr *)> IsHazard);
int getWaitStatesSinceDef(unsigned Reg,
@@ -52,6 +53,8 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
int checkVALUHazards(MachineInstr *VALU);
int checkRWLaneHazards(MachineInstr *RWLane);
int checkRFEHazards(MachineInstr *RFE);
+ int checkAnyInstHazards(MachineInstr *MI);
+ int checkReadM0Hazards(MachineInstr *SMovRel);
public:
GCNHazardRecognizer(const MachineFunction &MF);
// We can only issue one instruction per cycle.
diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
new file mode 100644
index 000000000000..3bb5c9bc22b7
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -0,0 +1,528 @@
+//===--------------------- GCNIterativeScheduler.cpp - --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "GCNIterativeScheduler.h"
+#include "GCNSchedStrategy.h"
+#include "SIMachineFunctionInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+namespace llvm {
+ std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
+ const ScheduleDAG &DAG);
+}
+
+// shim accessors for different order containers
+static inline MachineInstr *getMachineInstr(MachineInstr *MI) {
+ return MI;
+}
+static inline MachineInstr *getMachineInstr(const SUnit *SU) {
+ return SU->getInstr();
+}
+static inline MachineInstr *getMachineInstr(const SUnit &SU) {
+ return SU.getInstr();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+static void printRegion(raw_ostream &OS,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ const LiveIntervals *LIS,
+ unsigned MaxInstNum =
+ std::numeric_limits<unsigned>::max()) {
+ auto BB = Begin->getParent();
+ OS << BB->getParent()->getName() << ":BB#" << BB->getNumber()
+ << ' ' << BB->getName() << ":\n";
+ auto I = Begin;
+ MaxInstNum = std::max(MaxInstNum, 1u);
+ for (; I != End && MaxInstNum; ++I, --MaxInstNum) {
+ if (!I->isDebugValue() && LIS)
+ OS << LIS->getInstructionIndex(*I);
+ OS << '\t' << *I;
+ }
+ if (I != End) {
+ OS << "\t...\n";
+ I = std::prev(End);
+ if (!I->isDebugValue() && LIS)
+ OS << LIS->getInstructionIndex(*I);
+ OS << '\t' << *I;
+ }
+ if (End != BB->end()) { // print boundary inst if present
+ OS << "----\n";
+ if (LIS) OS << LIS->getInstructionIndex(*End) << '\t';
+ OS << *End;
+ }
+}
+
+LLVM_DUMP_METHOD
+static void printLivenessInfo(raw_ostream &OS,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ const LiveIntervals *LIS) {
+ const auto BB = Begin->getParent();
+ const auto &MRI = BB->getParent()->getRegInfo();
+
+ const auto LiveIns = getLiveRegsBefore(*Begin, *LIS);
+ OS << "LIn RP: ";
+ getRegPressure(MRI, LiveIns).print(OS);
+
+ const auto BottomMI = End == BB->end() ? std::prev(End) : End;
+ const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS);
+ OS << "LOt RP: ";
+ getRegPressure(MRI, LiveOuts).print(OS);
+}
+
+LLVM_DUMP_METHOD
+void GCNIterativeScheduler::printRegions(raw_ostream &OS) const {
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+ for (const auto R : Regions) {
+ OS << "Region to schedule ";
+ printRegion(OS, R->Begin, R->End, LIS, 1);
+ printLivenessInfo(OS, R->Begin, R->End, LIS);
+ OS << "Max RP: ";
+ R->MaxPressure.print(OS, &ST);
+ }
+}
+
+LLVM_DUMP_METHOD
+void GCNIterativeScheduler::printSchedResult(raw_ostream &OS,
+ const Region *R,
+ const GCNRegPressure &RP) const {
+ OS << "\nAfter scheduling ";
+ printRegion(OS, R->Begin, R->End, LIS);
+ printSchedRP(OS, R->MaxPressure, RP);
+ OS << '\n';
+}
+
+LLVM_DUMP_METHOD
+void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
+ const GCNRegPressure &Before,
+ const GCNRegPressure &After) const {
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+ OS << "RP before: ";
+ Before.print(OS, &ST);
+ OS << "RP after: ";
+ After.print(OS, &ST);
+}
+
+#endif
+
+// DAG builder helper
+class GCNIterativeScheduler::BuildDAG {
+ GCNIterativeScheduler &Sch;
+ SmallVector<SUnit*, 8> TopRoots;
+public:
+ BuildDAG(const Region &R, GCNIterativeScheduler &_Sch)
+ : Sch(_Sch) {
+ auto BB = R.Begin->getParent();
+ Sch.BaseClass::startBlock(BB);
+ Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
+
+ Sch.buildSchedGraph(Sch.AA, nullptr, nullptr, nullptr,
+ /*TrackLaneMask*/true);
+ Sch.Topo.InitDAGTopologicalSorting();
+
+ SmallVector<SUnit*, 8> BotRoots;
+ Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
+ }
+ ~BuildDAG() {
+ Sch.BaseClass::exitRegion();
+ Sch.BaseClass::finishBlock();
+ }
+ ArrayRef<const SUnit*> getTopRoots() const {
+ return TopRoots;
+ }
+};
+
+class GCNIterativeScheduler::OverrideLegacyStrategy {
+ GCNIterativeScheduler &Sch;
+ Region &Rgn;
+ std::unique_ptr<MachineSchedStrategy> SaveSchedImpl;
+ GCNRegPressure SaveMaxRP;
+public:
+ OverrideLegacyStrategy(Region &R,
+ MachineSchedStrategy &OverrideStrategy,
+ GCNIterativeScheduler &_Sch)
+ : Sch(_Sch)
+ , Rgn(R)
+ , SaveSchedImpl(std::move(_Sch.SchedImpl))
+ , SaveMaxRP(R.MaxPressure) {
+ Sch.SchedImpl.reset(&OverrideStrategy);
+ auto BB = R.Begin->getParent();
+ Sch.BaseClass::startBlock(BB);
+ Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
+ }
+ ~OverrideLegacyStrategy() {
+ Sch.BaseClass::exitRegion();
+ Sch.BaseClass::finishBlock();
+ Sch.SchedImpl.release();
+ Sch.SchedImpl = std::move(SaveSchedImpl);
+ }
+ void schedule() {
+ assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
+ DEBUG(dbgs() << "\nScheduling ";
+ printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
+ Sch.BaseClass::schedule();
+
+ // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
+ Sch.RegionEnd = Rgn.End;
+ //assert(Rgn.End == Sch.RegionEnd);
+ Rgn.Begin = Sch.RegionBegin;
+ Rgn.MaxPressure.clear();
+ }
+ void restoreOrder() {
+ assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
+ // DAG SUnits are stored using original region's order
+ // so just use SUnits as the restoring schedule
+ Sch.scheduleRegion(Rgn, Sch.SUnits, SaveMaxRP);
+ }
+};
+
+// just a stub to make base class happy
+class SchedStrategyStub : public MachineSchedStrategy {
+public:
+ bool shouldTrackPressure() const override { return false; }
+ bool shouldTrackLaneMasks() const override { return false; }
+ void initialize(ScheduleDAGMI *DAG) override {}
+ SUnit *pickNode(bool &IsTopNode) override { return nullptr; }
+ void schedNode(SUnit *SU, bool IsTopNode) override {}
+ void releaseTopNode(SUnit *SU) override {}
+ void releaseBottomNode(SUnit *SU) override {}
+};
+
+GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
+ StrategyKind S)
+ : BaseClass(C, llvm::make_unique<SchedStrategyStub>())
+ , Context(C)
+ , Strategy(S)
+ , UPTracker(*LIS) {
+}
+
+// returns max pressure for a region
+GCNRegPressure
+GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End)
+ const {
+ // For the purpose of pressure tracking bottom inst of the region should
+ // be also processed. End is either BB end, BB terminator inst or sched
+ // boundary inst.
+ auto const BBEnd = Begin->getParent()->end();
+ auto const BottomMI = End == BBEnd ? std::prev(End) : End;
+
+ // scheduleRegions walks bottom to top, so its likely we just get next
+ // instruction to track
+ auto AfterBottomMI = std::next(BottomMI);
+ if (AfterBottomMI == BBEnd ||
+ &*AfterBottomMI != UPTracker.getLastTrackedMI()) {
+ UPTracker.reset(*BottomMI);
+ } else {
+ assert(UPTracker.isValid());
+ }
+
+ for (auto I = BottomMI; I != Begin; --I)
+ UPTracker.recede(*I);
+
+ UPTracker.recede(*Begin);
+
+ assert(UPTracker.isValid() ||
+ (dbgs() << "Tracked region ",
+ printRegion(dbgs(), Begin, End, LIS), false));
+ return UPTracker.moveMaxPressure();
+}
+
+// returns max pressure for a tentative schedule
+template <typename Range> GCNRegPressure
+GCNIterativeScheduler::getSchedulePressure(const Region &R,
+ Range &&Schedule) const {
+ auto const BBEnd = R.Begin->getParent()->end();
+ GCNUpwardRPTracker RPTracker(*LIS);
+ if (R.End != BBEnd) {
+ // R.End points to the boundary instruction but the
+ // schedule doesn't include it
+ RPTracker.reset(*R.End);
+ RPTracker.recede(*R.End);
+ } else {
+ // R.End doesn't point to the boundary instruction
+ RPTracker.reset(*std::prev(BBEnd));
+ }
+ for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
+ RPTracker.recede(*getMachineInstr(*--I));
+ }
+ return RPTracker.moveMaxPressure();
+}
+
+void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned NumRegionInstrs) {
+ BaseClass::enterRegion(BB, Begin, End, NumRegionInstrs);
+ if (NumRegionInstrs > 2) {
+ Regions.push_back(
+ new (Alloc.Allocate())
+ Region { Begin, End, NumRegionInstrs,
+ getRegionPressure(Begin, End), nullptr });
+ }
+}
+
+void GCNIterativeScheduler::schedule() { // overriden
+ // do nothing
+ DEBUG(
+ printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
+ if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
+ dbgs() << "Max RP: ";
+ Regions.back()->MaxPressure.print(dbgs(), &MF.getSubtarget<SISubtarget>());
+ }
+ dbgs() << '\n';
+ );
+}
+
+void GCNIterativeScheduler::finalizeSchedule() { // overriden
+ if (Regions.empty())
+ return;
+ switch (Strategy) {
+ case SCHEDULE_MINREGONLY: scheduleMinReg(); break;
+ case SCHEDULE_MINREGFORCED: scheduleMinReg(true); break;
+ case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break;
+ }
+}
+
+// Detach schedule from SUnits and interleave it with debug values.
+// Returned schedule becomes independent of DAG state.
+std::vector<MachineInstr*>
+GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const {
+ std::vector<MachineInstr*> Res;
+ Res.reserve(Schedule.size() * 2);
+
+ if (FirstDbgValue)
+ Res.push_back(FirstDbgValue);
+
+ const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end();
+ for (auto SU : Schedule) {
+ Res.push_back(SU->getInstr());
+ const auto &D = std::find_if(DbgB, DbgE, [SU](decltype(*DbgB) &P) {
+ return P.second == SU->getInstr();
+ });
+ if (D != DbgE)
+ Res.push_back(D->first);
+ }
+ return Res;
+}
+
+void GCNIterativeScheduler::setBestSchedule(Region &R,
+ ScheduleRef Schedule,
+ const GCNRegPressure &MaxRP) {
+ R.BestSchedule.reset(
+ new TentativeSchedule{ detachSchedule(Schedule), MaxRP });
+}
+
+void GCNIterativeScheduler::scheduleBest(Region &R) {
+ assert(R.BestSchedule.get() && "No schedule specified");
+ scheduleRegion(R, R.BestSchedule->Schedule, R.BestSchedule->MaxPressure);
+ R.BestSchedule.reset();
+}
+
+// minimal required region scheduler, works for ranges of SUnits*,
+// SUnits or MachineIntrs*
+template <typename Range>
+void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
+ const GCNRegPressure &MaxRP) {
+ assert(RegionBegin == R.Begin && RegionEnd == R.End);
+ assert(LIS != nullptr);
+#ifndef NDEBUG
+ const auto SchedMaxRP = getSchedulePressure(R, Schedule);
+#endif
+ auto BB = R.Begin->getParent();
+ auto Top = R.Begin;
+ for (const auto &I : Schedule) {
+ auto MI = getMachineInstr(I);
+ if (MI != &*Top) {
+ BB->remove(MI);
+ BB->insert(Top, MI);
+ if (!MI->isDebugValue())
+ LIS->handleMove(*MI, true);
+ }
+ if (!MI->isDebugValue()) {
+ // Reset read - undef flags and update them later.
+ for (auto &Op : MI->operands())
+ if (Op.isReg() && Op.isDef())
+ Op.setIsUndef(false);
+
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, /*ShouldTrackLaneMasks*/true,
+ /*IgnoreDead*/false);
+ // Adjust liveness and add missing dead+read-undef flags.
+ auto SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ }
+ Top = std::next(MI->getIterator());
+ }
+ RegionBegin = getMachineInstr(Schedule.front());
+
+ // Schedule consisting of MachineInstr* is considered 'detached'
+ // and already interleaved with debug values
+ if (!std::is_same<decltype(*Schedule.begin()), MachineInstr*>::value) {
+ placeDebugValues();
+ // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
+ //assert(R.End == RegionEnd);
+ RegionEnd = R.End;
+ }
+
+ R.Begin = RegionBegin;
+ R.MaxPressure = MaxRP;
+
+#ifndef NDEBUG
+ const auto RegionMaxRP = getRegionPressure(R);
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+#endif
+ assert((SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP))
+ || (dbgs() << "Max RP mismatch!!!\n"
+ "RP for schedule (calculated): ",
+ SchedMaxRP.print(dbgs(), &ST),
+ dbgs() << "RP for schedule (reported): ",
+ MaxRP.print(dbgs(), &ST),
+ dbgs() << "RP after scheduling: ",
+ RegionMaxRP.print(dbgs(), &ST),
+ false));
+}
+
+// Sort recorded regions by pressure - highest at the front
+void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+ std::sort(Regions.begin(), Regions.end(),
+ [&ST, TargetOcc](const Region *R1, const Region *R2) {
+ return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
+ });
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Legacy MaxOccupancy Strategy
+
+// Tries to increase occupancy applying minreg scheduler for a sequence of
+// most demanding regions. Obtained schedules are saved as BestSchedule for a
+// region.
+// TargetOcc is the best achievable occupancy for a kernel.
+// Returns better occupancy on success or current occupancy on fail.
+// BestSchedules aren't deleted on fail.
+unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
+ // TODO: assert Regions are sorted descending by pressure
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+ const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
+ DEBUG(dbgs() << "Trying to to improve occupancy, target = " << TargetOcc
+ << ", current = " << Occ << '\n');
+
+ auto NewOcc = TargetOcc;
+ for (auto R : Regions) {
+ if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
+ break;
+
+ DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
+ printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
+
+ BuildDAG DAG(*R, *this);
+ const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
+ const auto MaxRP = getSchedulePressure(*R, MinSchedule);
+ DEBUG(dbgs() << "Occupancy improvement attempt:\n";
+ printSchedRP(dbgs(), R->MaxPressure, MaxRP));
+
+ NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST));
+ if (NewOcc <= Occ)
+ break;
+
+ setBestSchedule(*R, MinSchedule, MaxRP);
+ }
+ DEBUG(dbgs() << "New occupancy = " << NewOcc
+ << ", prev occupancy = " << Occ << '\n');
+ return std::max(NewOcc, Occ);
+}
+
+void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
+ bool TryMaximizeOccupancy) {
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+ auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
+
+ sortRegionsByPressure(TgtOcc);
+ auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
+
+ if (TryMaximizeOccupancy && Occ < TgtOcc)
+ Occ = tryMaximizeOccupancy(TgtOcc);
+
+ // This is really weird but for some magic scheduling regions twice
+ // gives performance improvement
+ const int NumPasses = Occ < TgtOcc ? 2 : 1;
+
+ TgtOcc = std::min(Occ, TgtOcc);
+ DEBUG(dbgs() << "Scheduling using default scheduler, "
+ "target occupancy = " << TgtOcc << '\n');
+ GCNMaxOccupancySchedStrategy LStrgy(Context);
+
+ for (int I = 0; I < NumPasses; ++I) {
+ // running first pass with TargetOccupancy = 0 mimics previous scheduling
+ // approach and is a performance magic
+ LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc);
+ for (auto R : Regions) {
+ OverrideLegacyStrategy Ovr(*R, LStrgy, *this);
+
+ Ovr.schedule();
+ const auto RP = getRegionPressure(*R);
+ DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
+
+ if (RP.getOccupancy(ST) < TgtOcc) {
+ DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
+ if (R->BestSchedule.get() &&
+ R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
+ DEBUG(dbgs() << ", scheduling minimal register\n");
+ scheduleBest(*R);
+ } else {
+ DEBUG(dbgs() << ", restoring\n");
+ Ovr.restoreOrder();
+ assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
+ }
+ }
+ }
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Minimal Register Strategy
+
+void GCNIterativeScheduler::scheduleMinReg(bool force) {
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+ const auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
+ sortRegionsByPressure(TgtOcc);
+
+ auto MaxPressure = Regions.front()->MaxPressure;
+ for (auto R : Regions) {
+ if (!force && R->MaxPressure.less(ST, MaxPressure, TgtOcc))
+ break;
+
+ BuildDAG DAG(*R, *this);
+ const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
+
+ const auto RP = getSchedulePressure(*R, MinSchedule);
+ DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
+ dbgs() << "\nWarning: Pressure becomes worse after minreg!";
+ printSchedRP(dbgs(), R->MaxPressure, RP);
+ });
+
+ if (!force && MaxPressure.less(ST, RP, TgtOcc))
+ break;
+
+ scheduleRegion(*R, MinSchedule, RP);
+ DEBUG(printSchedResult(dbgs(), R, RP));
+
+ MaxPressure = RP;
+ }
+}
diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.h b/lib/Target/AMDGPU/GCNIterativeScheduler.h
new file mode 100644
index 000000000000..df3afce21ebc
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNIterativeScheduler.h
@@ -0,0 +1,118 @@
+//===--------- GCNIterativeScheduler.h - GCN Scheduler -*- C++ -*----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNITERATIVESCHEDULER_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNITERATIVESCHEDULER_H
+
+#include "GCNRegPressure.h"
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+class GCNIterativeScheduler : public ScheduleDAGMILive {
+ typedef ScheduleDAGMILive BaseClass;
+public:
+ enum StrategyKind {
+ SCHEDULE_MINREGONLY,
+ SCHEDULE_MINREGFORCED,
+ SCHEDULE_LEGACYMAXOCCUPANCY
+ };
+
+ GCNIterativeScheduler(MachineSchedContext *C,
+ StrategyKind S);
+
+ void schedule() override;
+
+ void enterRegion(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned RegionInstrs) override;
+
+ void finalizeSchedule() override;
+
+protected:
+
+ typedef ArrayRef<const SUnit*> ScheduleRef;
+
+ struct TentativeSchedule {
+ std::vector<MachineInstr*> Schedule;
+ GCNRegPressure MaxPressure;
+ };
+
+ struct Region {
+ // Fields except for BestSchedule are supposed to reflect current IR state
+ // `const` fields are to emphasize they shouldn't change for any schedule.
+ MachineBasicBlock::iterator Begin;
+ // End is either a boundary instruction or end of basic block
+ const MachineBasicBlock::iterator End;
+ const unsigned NumRegionInstrs;
+ GCNRegPressure MaxPressure;
+
+ // best schedule for the region so far (not scheduled yet)
+ std::unique_ptr<TentativeSchedule> BestSchedule;
+ };
+
+ SpecificBumpPtrAllocator<Region> Alloc;
+ std::vector<Region*> Regions;
+
+ MachineSchedContext *Context;
+ const StrategyKind Strategy;
+ mutable GCNUpwardRPTracker UPTracker;
+
+ class BuildDAG;
+ class OverrideLegacyStrategy;
+
+ template <typename Range>
+ GCNRegPressure getSchedulePressure(const Region &R,
+ Range &&Schedule) const;
+
+ GCNRegPressure getRegionPressure(MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End) const;
+
+ GCNRegPressure getRegionPressure(const Region &R) const {
+ return getRegionPressure(R.Begin, R.End);
+ }
+
+ void setBestSchedule(Region &R,
+ ScheduleRef Schedule,
+ const GCNRegPressure &MaxRP = GCNRegPressure());
+
+ void scheduleBest(Region &R);
+
+ std::vector<MachineInstr*> detachSchedule(ScheduleRef Schedule) const;
+
+ void sortRegionsByPressure(unsigned TargetOcc);
+
+ template <typename Range>
+ void scheduleRegion(Region &R, Range &&Schedule,
+ const GCNRegPressure &MaxRP = GCNRegPressure());
+
+ unsigned tryMaximizeOccupancy(unsigned TargetOcc =
+ std::numeric_limits<unsigned>::max());
+
+ void scheduleLegacyMaxOccupancy(bool TryMaximizeOccupancy = true);
+ void scheduleMinReg(bool force = false);
+
+ void printRegions(raw_ostream &OS) const;
+ void printSchedResult(raw_ostream &OS,
+ const Region *R,
+ const GCNRegPressure &RP) const;
+ void printSchedRP(raw_ostream &OS,
+ const GCNRegPressure &Before,
+ const GCNRegPressure &After) const;
+};
+
+} // End namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNITERATIVESCHEDULER_H
diff --git a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
new file mode 100644
index 000000000000..c6d0f2179950
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -0,0 +1,266 @@
+//===----------------------- GCNMinRegStrategy.cpp - ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+class GCNMinRegScheduler {
+ struct Candidate : ilist_node<Candidate> {
+ const SUnit *SU;
+ int Priority;
+
+ Candidate(const SUnit *SU_, int Priority_ = 0)
+ : SU(SU_), Priority(Priority_) {}
+ };
+
+ SpecificBumpPtrAllocator<Candidate> Alloc;
+ typedef simple_ilist<Candidate> Queue;
+ Queue RQ; // Ready queue
+
+ std::vector<unsigned> NumPreds;
+
+ bool isScheduled(const SUnit *SU) const {
+ assert(!SU->isBoundaryNode());
+ return NumPreds[SU->NodeNum] == std::numeric_limits<unsigned>::max();
+ }
+
+ void setIsScheduled(const SUnit *SU) {
+ assert(!SU->isBoundaryNode());
+ NumPreds[SU->NodeNum] = std::numeric_limits<unsigned>::max();
+ }
+
+ unsigned getNumPreds(const SUnit *SU) const {
+ assert(!SU->isBoundaryNode());
+ assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max());
+ return NumPreds[SU->NodeNum];
+ }
+
+ unsigned decNumPreds(const SUnit *SU) {
+ assert(!SU->isBoundaryNode());
+ assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max());
+ return --NumPreds[SU->NodeNum];
+ }
+
+ void initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits);
+
+ int getReadySuccessors(const SUnit *SU) const;
+ int getNotReadySuccessors(const SUnit *SU) const;
+
+ template <typename Calc>
+ unsigned findMax(unsigned Num, Calc C);
+
+ Candidate* pickCandidate();
+
+ void bumpPredsPriority(const SUnit *SchedSU, int Priority);
+ void releaseSuccessors(const SUnit* SU, int Priority);
+
+public:
+ std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
+ const ScheduleDAG &DAG);
+};
+
+void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) {
+ NumPreds.resize(SUnits.size());
+ for (unsigned I = 0; I < SUnits.size(); ++I)
+ NumPreds[I] = SUnits[I].NumPredsLeft;
+}
+
+int GCNMinRegScheduler::getReadySuccessors(const SUnit *SU) const {
+ unsigned NumSchedSuccs = 0;
+ for (auto SDep : SU->Succs) {
+ bool wouldBeScheduled = true;
+ for (auto PDep : SDep.getSUnit()->Preds) {
+ auto PSU = PDep.getSUnit();
+ assert(!PSU->isBoundaryNode());
+ if (PSU != SU && !isScheduled(PSU)) {
+ wouldBeScheduled = false;
+ break;
+ }
+ }
+ NumSchedSuccs += wouldBeScheduled ? 1 : 0;
+ }
+ return NumSchedSuccs;
+}
+
+int GCNMinRegScheduler::getNotReadySuccessors(const SUnit *SU) const {
+ return SU->Succs.size() - getReadySuccessors(SU);
+}
+
+template <typename Calc>
+unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) {
+ assert(!RQ.empty() && Num <= RQ.size());
+ typedef decltype(C(*RQ.begin())) T;
+ T Max = std::numeric_limits<T>::min();
+ unsigned NumMax = 0;
+ for (auto I = RQ.begin(); Num; --Num) {
+ T Cur = C(*I);
+ if (Cur >= Max) {
+ if (Cur > Max) {
+ Max = Cur;
+ NumMax = 1;
+ } else
+ ++NumMax;
+ auto &Cand = *I++;
+ RQ.remove(Cand);
+ RQ.push_front(Cand);
+ continue;
+ }
+ ++I;
+ }
+ return NumMax;
+}
+
+GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() {
+ do {
+ unsigned Num = RQ.size();
+ if (Num == 1) break;
+
+ DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num << '\n');
+ Num = findMax(Num, [=](const Candidate &C) { return C.Priority; });
+ if (Num == 1) break;
+
+ DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among "
+ << Num << '\n');
+ Num = findMax(Num, [=](const Candidate &C) {
+ auto SU = C.SU;
+ int Res = getNotReadySuccessors(SU);
+ DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready "
+ << Res << " successors, metric = " << -Res << '\n');
+ return -Res;
+ });
+ if (Num == 1) break;
+
+ DEBUG(dbgs() << "\nSelecting most producing candidate among "
+ << Num << '\n');
+ Num = findMax(Num, [=](const Candidate &C) {
+ auto SU = C.SU;
+ auto Res = getReadySuccessors(SU);
+ DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready "
+ << Res << " successors, metric = " << Res << '\n');
+ return Res;
+ });
+ if (Num == 1) break;
+
+ Num = Num ? Num : RQ.size();
+ DEBUG(dbgs() << "\nCan't find best candidate, selecting in program order among "
+ << Num << '\n');
+ Num = findMax(Num, [=](const Candidate &C) { return -(int64_t)C.SU->NodeNum; });
+ assert(Num == 1);
+ } while (false);
+
+ return &RQ.front();
+}
+
+void GCNMinRegScheduler::bumpPredsPriority(const SUnit *SchedSU, int Priority) {
+ SmallPtrSet<const SUnit*, 32> Set;
+ for (const auto &S : SchedSU->Succs) {
+ if (S.getSUnit()->isBoundaryNode() || isScheduled(S.getSUnit()) ||
+ S.getKind() != SDep::Data)
+ continue;
+ for (const auto &P : S.getSUnit()->Preds) {
+ auto PSU = P.getSUnit();
+ assert(!PSU->isBoundaryNode());
+ if (PSU != SchedSU && !isScheduled(PSU)) {
+ Set.insert(PSU);
+ }
+ }
+ }
+ SmallVector<const SUnit*, 32> Worklist(Set.begin(), Set.end());
+ while (!Worklist.empty()) {
+ auto SU = Worklist.pop_back_val();
+ assert(!SU->isBoundaryNode());
+ for (const auto &P : SU->Preds) {
+ if (!P.getSUnit()->isBoundaryNode() && !isScheduled(P.getSUnit()) &&
+ Set.insert(P.getSUnit()).second)
+ Worklist.push_back(P.getSUnit());
+ }
+ }
+ DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum
+ << ")'s non-ready successors of " << Priority
+ << " priority in ready queue: ");
+ const auto SetEnd = Set.end();
+ for (auto &C : RQ) {
+ if (Set.find(C.SU) != SetEnd) {
+ C.Priority = Priority;
+ DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')');
+ }
+ }
+ DEBUG(dbgs() << '\n');
+}
+
+void GCNMinRegScheduler::releaseSuccessors(const SUnit* SU, int Priority) {
+ for (const auto &S : SU->Succs) {
+ auto SuccSU = S.getSUnit();
+ if (S.isWeak())
+ continue;
+ assert(SuccSU->isBoundaryNode() || getNumPreds(SuccSU) > 0);
+ if (!SuccSU->isBoundaryNode() && decNumPreds(SuccSU) == 0)
+ RQ.push_front(*new (Alloc.Allocate()) Candidate(SuccSU, Priority));
+ }
+}
+
+std::vector<const SUnit*>
+GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
+ const ScheduleDAG &DAG) {
+ const auto &SUnits = DAG.SUnits;
+ std::vector<const SUnit*> Schedule;
+ Schedule.reserve(SUnits.size());
+
+ initNumPreds(SUnits);
+
+ int StepNo = 0;
+
+ for (auto SU : TopRoots) {
+ RQ.push_back(*new (Alloc.Allocate()) Candidate(SU, StepNo));
+ }
+ releaseSuccessors(&DAG.EntrySU, StepNo);
+
+ while (!RQ.empty()) {
+ DEBUG(
+ dbgs() << "\n=== Picking candidate, Step = " << StepNo << "\n"
+ "Ready queue:";
+ for (auto &C : RQ)
+ dbgs() << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')';
+ dbgs() << '\n';
+ );
+
+ auto C = pickCandidate();
+ assert(C);
+ RQ.remove(*C);
+ auto SU = C->SU;
+ DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
+
+ releaseSuccessors(SU, StepNo);
+ Schedule.push_back(SU);
+ setIsScheduled(SU);
+
+ if (getReadySuccessors(SU) == 0)
+ bumpPredsPriority(SU, StepNo);
+
+ ++StepNo;
+ }
+ assert(SUnits.size() == Schedule.size());
+
+ return Schedule;
+}
+
+namespace llvm {
+std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
+ const ScheduleDAG &DAG) {
+ GCNMinRegScheduler S;
+ return S.schedule(TopRoots, DAG);
+}
+}
diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp
new file mode 100644
index 000000000000..4ecfa118fb27
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -0,0 +1,355 @@
+//===------------------------- GCNRegPressure.cpp - -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "GCNRegPressure.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+void llvm::printLivesAt(SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI) {
+ dbgs() << "Live regs at " << SI << ": "
+ << *LIS.getInstructionFromIndex(SI);
+ unsigned Num = 0;
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ const unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ const auto &LI = LIS.getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ bool firstTime = true;
+ for (const auto &S : LI.subranges()) {
+ if (!S.liveAt(SI)) continue;
+ if (firstTime) {
+ dbgs() << " " << PrintReg(Reg, MRI.getTargetRegisterInfo())
+ << '\n';
+ firstTime = false;
+ }
+ dbgs() << " " << S << '\n';
+ ++Num;
+ }
+ } else if (LI.liveAt(SI)) {
+ dbgs() << " " << LI << '\n';
+ ++Num;
+ }
+ }
+ if (!Num) dbgs() << " <none>\n";
+}
+
+static bool isEqual(const GCNRPTracker::LiveRegSet &S1,
+ const GCNRPTracker::LiveRegSet &S2) {
+ if (S1.size() != S2.size())
+ return false;
+
+ for (const auto &P : S1) {
+ auto I = S2.find(P.first);
+ if (I == S2.end() || I->second != P.second)
+ return false;
+ }
+ return true;
+}
+
+static GCNRPTracker::LiveRegSet
+stripEmpty(const GCNRPTracker::LiveRegSet &LR) {
+ GCNRPTracker::LiveRegSet Res;
+ for (const auto &P : LR) {
+ if (P.second.any())
+ Res.insert(P);
+ }
+ return Res;
+}
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// GCNRegPressure
+
+unsigned GCNRegPressure::getRegKind(unsigned Reg,
+ const MachineRegisterInfo &MRI) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ const auto RC = MRI.getRegClass(Reg);
+ auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
+ return STI->isSGPRClass(RC) ?
+ (RC->getSize() == 4 ? SGPR32 : SGPR_TUPLE) :
+ (RC->getSize() == 4 ? VGPR32 : VGPR_TUPLE);
+}
+
+void GCNRegPressure::inc(unsigned Reg,
+ LaneBitmask PrevMask,
+ LaneBitmask NewMask,
+ const MachineRegisterInfo &MRI) {
+ if (NewMask == PrevMask)
+ return;
+
+ int Sign = 1;
+ if (NewMask < PrevMask) {
+ std::swap(NewMask, PrevMask);
+ Sign = -1;
+ }
+#ifndef NDEBUG
+ const auto MaxMask = MRI.getMaxLaneMaskForVReg(Reg);
+#endif
+ switch (auto Kind = getRegKind(Reg, MRI)) {
+ case SGPR32:
+ case VGPR32:
+ assert(PrevMask.none() && NewMask == MaxMask);
+ Value[Kind] += Sign;
+ break;
+
+ case SGPR_TUPLE:
+ case VGPR_TUPLE:
+ assert(NewMask < MaxMask || NewMask == MaxMask);
+ assert(PrevMask < NewMask);
+
+ Value[Kind == SGPR_TUPLE ? SGPR32 : VGPR32] +=
+ Sign * countPopulation((~PrevMask & NewMask).getAsInteger());
+
+ if (PrevMask.none()) {
+ assert(NewMask.any());
+ Value[Kind] += Sign * MRI.getPressureSets(Reg).getWeight();
+ }
+ break;
+
+ default: llvm_unreachable("Unknown register kind");
+ }
+}
+
+bool GCNRegPressure::less(const SISubtarget &ST,
+ const GCNRegPressure& O,
+ unsigned MaxOccupancy) const {
+ const auto SGPROcc = std::min(MaxOccupancy,
+ ST.getOccupancyWithNumSGPRs(getSGRPNum()));
+ const auto VGPROcc = std::min(MaxOccupancy,
+ ST.getOccupancyWithNumVGPRs(getVGRPNum()));
+ const auto OtherSGPROcc = std::min(MaxOccupancy,
+ ST.getOccupancyWithNumSGPRs(O.getSGRPNum()));
+ const auto OtherVGPROcc = std::min(MaxOccupancy,
+ ST.getOccupancyWithNumVGPRs(O.getVGRPNum()));
+
+ const auto Occ = std::min(SGPROcc, VGPROcc);
+ const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
+ if (Occ != OtherOcc)
+ return Occ > OtherOcc;
+
+ bool SGPRImportant = SGPROcc < VGPROcc;
+ const bool OtherSGPRImportant = OtherSGPROcc < OtherVGPROcc;
+
+ // if both pressures disagree on what is more important compare vgprs
+ if (SGPRImportant != OtherSGPRImportant) {
+ SGPRImportant = false;
+ }
+
+ // compare large regs pressure
+ bool SGPRFirst = SGPRImportant;
+ for (int I = 2; I > 0; --I, SGPRFirst = !SGPRFirst) {
+ if (SGPRFirst) {
+ auto SW = getSGPRTuplesWeight();
+ auto OtherSW = O.getSGPRTuplesWeight();
+ if (SW != OtherSW)
+ return SW < OtherSW;
+ } else {
+ auto VW = getVGPRTuplesWeight();
+ auto OtherVW = O.getVGPRTuplesWeight();
+ if (VW != OtherVW)
+ return VW < OtherVW;
+ }
+ }
+ return SGPRImportant ? (getSGRPNum() < O.getSGRPNum()):
+ (getVGRPNum() < O.getVGRPNum());
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+void GCNRegPressure::print(raw_ostream &OS, const SISubtarget *ST) const {
+ OS << "VGPRs: " << getVGRPNum();
+ if (ST) OS << "(O" << ST->getOccupancyWithNumVGPRs(getVGRPNum()) << ')';
+ OS << ", SGPRs: " << getSGRPNum();
+ if (ST) OS << "(O" << ST->getOccupancyWithNumSGPRs(getSGRPNum()) << ')';
+ OS << ", LVGPR WT: " << getVGPRTuplesWeight()
+ << ", LSGPR WT: " << getSGPRTuplesWeight();
+ if (ST) OS << " -> Occ: " << getOccupancy(*ST);
+ OS << '\n';
+}
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// GCNRPTracker
+
+LaneBitmask llvm::getLiveLaneMask(unsigned Reg,
+ SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI) {
+ assert(!MRI.reg_nodbg_empty(Reg));
+ LaneBitmask LiveMask;
+ const auto &LI = LIS.getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ for (const auto &S : LI.subranges())
+ if (S.liveAt(SI)) {
+ LiveMask |= S.LaneMask;
+ assert(LiveMask < MRI.getMaxLaneMaskForVReg(Reg) ||
+ LiveMask == MRI.getMaxLaneMaskForVReg(Reg));
+ }
+ } else if (LI.liveAt(SI)) {
+ LiveMask = MRI.getMaxLaneMaskForVReg(Reg);
+ }
+ return LiveMask;
+}
+
+GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI) {
+ GCNRPTracker::LiveRegSet LiveRegs;
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ auto Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI);
+ if (LiveMask.any())
+ LiveRegs[Reg] = LiveMask;
+ }
+ return LiveRegs;
+}
+
+void GCNUpwardRPTracker::reset(const MachineInstr &MI) {
+ MRI = &MI.getParent()->getParent()->getRegInfo();
+ LiveRegs = getLiveRegsAfter(MI, LIS);
+ MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
+}
+
+LaneBitmask GCNUpwardRPTracker::getDefRegMask(const MachineOperand &MO) const {
+ assert(MO.isDef() && MO.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()));
+
+ // We don't rely on read-undef flag because in case of tentative schedule
+ // tracking it isn't set correctly yet. This works correctly however since
+ // use mask has been tracked before using LIS.
+ return MO.getSubReg() == 0 ?
+ MRI->getMaxLaneMaskForVReg(MO.getReg()) :
+ MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg());
+}
+
+LaneBitmask GCNUpwardRPTracker::getUsedRegMask(const MachineOperand &MO) const {
+ assert(MO.isUse() && MO.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()));
+
+ if (auto SubReg = MO.getSubReg())
+ return MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg);
+
+ auto MaxMask = MRI->getMaxLaneMaskForVReg(MO.getReg());
+ if (MaxMask.getAsInteger() == 1) // cannot have subregs
+ return MaxMask;
+
+ // For a tentative schedule LIS isn't updated yet but livemask should remain
+ // the same on any schedule. Subreg defs can be reordered but they all must
+ // dominate uses anyway.
+ auto SI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
+ return getLiveLaneMask(MO.getReg(), SI, LIS, *MRI);
+}
+
+void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
+ assert(MRI && "call reset first");
+
+ LastTrackedMI = &MI;
+
+ if (MI.isDebugValue())
+ return;
+
+ // process all defs first to ensure early clobbers are handled correctly
+ // iterating over operands() to catch implicit defs
+ for (const auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef() ||
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+
+ auto Reg = MO.getReg();
+ auto &LiveMask = LiveRegs[Reg];
+ auto PrevMask = LiveMask;
+ LiveMask &= ~getDefRegMask(MO);
+ CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ }
+
+ // then all uses
+ for (const auto &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.readsReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+
+ auto Reg = MO.getReg();
+ auto &LiveMask = LiveRegs[Reg];
+ auto PrevMask = LiveMask;
+ LiveMask |= getUsedRegMask(MO);
+ CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ }
+
+ MaxPressure = max(MaxPressure, CurPressure);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+static void reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
+ const GCNRPTracker::LiveRegSet &TrackedLR,
+ const TargetRegisterInfo *TRI) {
+ for (auto const &P : TrackedLR) {
+ auto I = LISLR.find(P.first);
+ if (I == LISLR.end()) {
+ dbgs() << " " << PrintReg(P.first, TRI)
+ << ":L" << PrintLaneMask(P.second)
+ << " isn't found in LIS reported set\n";
+ }
+ else if (I->second != P.second) {
+ dbgs() << " " << PrintReg(P.first, TRI)
+ << " masks doesn't match: LIS reported "
+ << PrintLaneMask(I->second)
+ << ", tracked "
+ << PrintLaneMask(P.second)
+ << '\n';
+ }
+ }
+ for (auto const &P : LISLR) {
+ auto I = TrackedLR.find(P.first);
+ if (I == TrackedLR.end()) {
+ dbgs() << " " << PrintReg(P.first, TRI)
+ << ":L" << PrintLaneMask(P.second)
+ << " isn't found in tracked set\n";
+ }
+ }
+}
+
+bool GCNUpwardRPTracker::isValid() const {
+ const auto &SI = LIS.getInstructionIndex(*LastTrackedMI).getBaseIndex();
+ const auto LISLR = llvm::getLiveRegs(SI, LIS, *MRI);
+ const auto TrackedLR = stripEmpty(LiveRegs);
+
+ if (!isEqual(LISLR, TrackedLR)) {
+ dbgs() << "\nGCNUpwardRPTracker error: Tracked and"
+ " LIS reported livesets mismatch:\n";
+ printLivesAt(SI, LIS, *MRI);
+ reportMismatch(LISLR, TrackedLR, MRI->getTargetRegisterInfo());
+ return false;
+ }
+
+ auto LISPressure = getRegPressure(*MRI, LISLR);
+ if (LISPressure != CurPressure) {
+ dbgs() << "GCNUpwardRPTracker error: Pressure sets different\nTracked: ";
+ CurPressure.print(dbgs());
+ dbgs() << "LIS rpt: ";
+ LISPressure.print(dbgs());
+ return false;
+ }
+ return true;
+}
+
+#endif
diff --git a/lib/Target/AMDGPU/GCNRegPressure.h b/lib/Target/AMDGPU/GCNRegPressure.h
new file mode 100644
index 000000000000..82e76a7bfddc
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNRegPressure.h
@@ -0,0 +1,170 @@
+//===---------------------- GCNRegPressure.h -*- C++ -*--------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
+
+#include "AMDGPUSubtarget.h"
+
+#include <limits>
+
+namespace llvm {
+
+struct GCNRegPressure {
+ enum RegKind {
+ SGPR32,
+ SGPR_TUPLE,
+ VGPR32,
+ VGPR_TUPLE,
+ TOTAL_KINDS
+ };
+
+ GCNRegPressure() {
+ clear();
+ }
+
+ bool empty() const { return getSGRPNum() == 0 && getVGRPNum() == 0; }
+
+ void clear() { std::fill(&Value[0], &Value[TOTAL_KINDS], 0); }
+
+ unsigned getSGRPNum() const { return Value[SGPR32]; }
+ unsigned getVGRPNum() const { return Value[VGPR32]; }
+
+ unsigned getVGPRTuplesWeight() const { return Value[VGPR_TUPLE]; }
+ unsigned getSGPRTuplesWeight() const { return Value[SGPR_TUPLE]; }
+
+ unsigned getOccupancy(const SISubtarget &ST) const {
+ return std::min(ST.getOccupancyWithNumSGPRs(getSGRPNum()),
+ ST.getOccupancyWithNumVGPRs(getVGRPNum()));
+ }
+
+ void inc(unsigned Reg,
+ LaneBitmask PrevMask,
+ LaneBitmask NewMask,
+ const MachineRegisterInfo &MRI);
+
+ bool higherOccupancy(const SISubtarget &ST, const GCNRegPressure& O) const {
+ return getOccupancy(ST) > O.getOccupancy(ST);
+ }
+
+ bool less(const SISubtarget &ST, const GCNRegPressure& O,
+ unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
+
+ bool operator==(const GCNRegPressure &O) const {
+ return std::equal(&Value[0], &Value[TOTAL_KINDS], O.Value);
+ }
+
+ bool operator!=(const GCNRegPressure &O) const {
+ return !(*this == O);
+ }
+
+ void print(raw_ostream &OS, const SISubtarget *ST=nullptr) const;
+ void dump() const { print(dbgs()); }
+
+private:
+ unsigned Value[TOTAL_KINDS];
+
+ static unsigned getRegKind(unsigned Reg, const MachineRegisterInfo &MRI);
+
+ friend GCNRegPressure max(const GCNRegPressure &P1,
+ const GCNRegPressure &P2);
+};
+
+inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
+ GCNRegPressure Res;
+ for (unsigned I = 0; I < GCNRegPressure::TOTAL_KINDS; ++I)
+ Res.Value[I] = std::max(P1.Value[I], P2.Value[I]);
+ return Res;
+}
+
+class GCNRPTracker {
+public:
+ typedef DenseMap<unsigned, LaneBitmask> LiveRegSet;
+
+protected:
+ LiveRegSet LiveRegs;
+ GCNRegPressure CurPressure, MaxPressure;
+ const MachineInstr *LastTrackedMI = nullptr;
+ mutable const MachineRegisterInfo *MRI = nullptr;
+ GCNRPTracker() {}
+public:
+ // live regs for the current state
+ const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
+ const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
+
+ // returns MaxPressure, resetting it
+ decltype(MaxPressure) moveMaxPressure() {
+ auto Res = MaxPressure;
+ MaxPressure.clear();
+ return Res;
+ }
+ decltype(LiveRegs) moveLiveRegs() {
+ return std::move(LiveRegs);
+ }
+};
+
+class GCNUpwardRPTracker : public GCNRPTracker {
+ const LiveIntervals &LIS;
+ LaneBitmask getDefRegMask(const MachineOperand &MO) const;
+ LaneBitmask getUsedRegMask(const MachineOperand &MO) const;
+public:
+ GCNUpwardRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {}
+ // reset tracker to the point just below MI
+ // filling live regs upon this point using LIS
+ void reset(const MachineInstr &MI);
+
+ // move to the state just above the MI
+ void recede(const MachineInstr &MI);
+
+ // checks whether the tracker's state after receding MI corresponds
+ // to reported by LIS
+ bool isValid() const;
+};
+
+LaneBitmask getLiveLaneMask(unsigned Reg,
+ SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI);
+
+GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI);
+
+inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI,
+ const LiveIntervals &LIS) {
+ return getLiveRegs(LIS.getInstructionIndex(MI).getDeadSlot(), LIS,
+ MI.getParent()->getParent()->getRegInfo());
+}
+
+inline GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI,
+ const LiveIntervals &LIS) {
+ return getLiveRegs(LIS.getInstructionIndex(MI).getBaseIndex(), LIS,
+ MI.getParent()->getParent()->getRegInfo());
+}
+
+template <typename Range>
+GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI,
+ Range &&LiveRegs) {
+ GCNRegPressure Res;
+ for (const auto &RM : LiveRegs)
+ Res.inc(RM.first, LaneBitmask::getNone(), RM.second, MRI);
+ return Res;
+}
+
+void printLivesAt(SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI);
+
+} // End namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 2f88033c807f..ea305a92fc60 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -18,6 +18,7 @@
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/Support/MathExtras.h"
#define DEBUG_TYPE "misched"
@@ -25,7 +26,7 @@ using namespace llvm;
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
const MachineSchedContext *C) :
- GenericScheduler(C) { }
+ GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { }
static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
const MachineFunction &MF) {
@@ -35,18 +36,46 @@ static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs),
ST.getOccupancyWithNumVGPRs(VGPRs));
return std::min(MinRegOccupancy,
- ST.getOccupancyWithLocalMemSize(MFI->getLDSSize()));
+ ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
+ *MF.getFunction()));
+}
+
+void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
+ GenericScheduler::initialize(DAG);
+
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
+
+ MF = &DAG->MF;
+
+ const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+
+ // FIXME: This is also necessary, because some passes that run after
+ // scheduling and before regalloc increase register pressure.
+ const int ErrorMargin = 3;
+
+ SGPRExcessLimit = Context->RegClassInfo
+ ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
+ VGPRExcessLimit = Context->RegClassInfo
+ ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
+ if (TargetOccupancy) {
+ SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true);
+ VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy);
+ } else {
+ SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
+ SRI->getSGPRPressureSet());
+ VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
+ SRI->getVGPRPressureSet());
+ }
+
+ SGPRCriticalLimit -= ErrorMargin;
+ VGPRCriticalLimit -= ErrorMargin;
}
void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
bool AtTop, const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI,
- int SGPRPressure,
- int VGPRPressure,
- int SGPRExcessLimit,
- int VGPRExcessLimit,
- int SGPRCriticalLimit,
- int VGPRCriticalLimit) {
+ unsigned SGPRPressure,
+ unsigned VGPRPressure) {
Cand.SU = SU;
Cand.AtTop = AtTop;
@@ -66,8 +95,8 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
}
- int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
- int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
+ unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
+ unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
// If two instructions increase the pressure of different register sets
// by the same amount, the generic scheduler will prefer to schedule the
@@ -77,7 +106,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// only for VGPRs or only for SGPRs.
// FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
- const int MaxVGPRPressureInc = 16;
+ const unsigned MaxVGPRPressureInc = 16;
bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
@@ -86,11 +115,6 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// to increase the likelihood we don't go over the limits. We should improve
// the analysis to look through dependencies to find the path with the least
// register pressure.
- // FIXME: This is also necessary, because some passes that run after
- // scheduling and before regalloc increase register pressure.
- const int ErrorMargin = 3;
- VGPRExcessLimit -= ErrorMargin;
- SGPRExcessLimit -= ErrorMargin;
// We only need to update the RPDelata for instructions that increase
// register pressure. Instructions that decrease or keep reg pressure
@@ -103,7 +127,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
- Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure = SGPRExcessLimit);
+ Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
}
// Register pressure is considered 'CRITICAL' if it is approaching a value
@@ -111,9 +135,6 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
// has the same cost, so we don't need to prefer one over the other.
- VGPRCriticalLimit -= ErrorMargin;
- SGPRCriticalLimit -= ErrorMargin;
-
int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
@@ -134,27 +155,16 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand) {
- const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>();
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
- unsigned SGPRExcessLimit =
- Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
- unsigned VGPRExcessLimit =
- Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
- unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
- unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true);
- unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves);
-
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {
SchedCandidate TryCand(ZonePolicy);
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
- SGPRPressure, VGPRPressure,
- SGPRExcessLimit, VGPRExcessLimit,
- SGPRCriticalLimit, VGPRCriticalLimit);
+ SGPRPressure, VGPRPressure);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
@@ -167,16 +177,6 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
}
}
-static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) {
- switch (Reason) {
- default:
- return Reason;
- case GenericSchedulerBase::RegCritical:
- case GenericSchedulerBase::RegExcess:
- return -Reason;
- }
-}
-
// This function is mostly cut and pasted from
// GenericScheduler::pickNodeBidirectional()
SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
@@ -224,9 +224,9 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
// Pick best from BotCand and TopCand.
DEBUG(
dbgs() << "Top Cand: ";
- traceCandidate(BotCand);
- dbgs() << "Bot Cand: ";
traceCandidate(TopCand);
+ dbgs() << "Bot Cand: ";
+ traceCandidate(BotCand);
);
SchedCandidate Cand;
if (TopCand.Reason == BotCand.Reason) {
@@ -249,9 +249,7 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
} else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
Cand = BotCand;
} else {
- int TopRank = getBidirectionalReasonRank(TopCand.Reason);
- int BotRank = getBidirectionalReasonRank(BotCand.Reason);
- if (TopRank > BotRank) {
+ if (BotCand.Reason > TopCand.Reason) {
Cand = TopCand;
} else {
Cand = BotCand;
@@ -310,3 +308,255 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
return SU;
}
+
+GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
+ std::unique_ptr<MachineSchedStrategy> S) :
+ ScheduleDAGMILive(C, std::move(S)),
+ ST(MF.getSubtarget<SISubtarget>()),
+ MFI(*MF.getInfo<SIMachineFunctionInfo>()),
+ StartingOccupancy(ST.getOccupancyWithLocalMemSize(MFI.getLDSSize(),
+ *MF.getFunction())),
+ MinOccupancy(StartingOccupancy), Stage(0) {
+
+ DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
+}
+
+void GCNScheduleDAGMILive::schedule() {
+ std::vector<MachineInstr*> Unsched;
+ Unsched.reserve(NumRegionInstrs);
+ for (auto &I : *this)
+ Unsched.push_back(&I);
+
+ std::pair<unsigned, unsigned> PressureBefore;
+ if (LIS) {
+ DEBUG(dbgs() << "Pressure before scheduling:\n");
+ discoverLiveIns();
+ PressureBefore = getRealRegPressure();
+ }
+
+ ScheduleDAGMILive::schedule();
+ if (Stage == 0)
+ Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
+
+ if (!LIS)
+ return;
+
+ // Check the results of scheduling.
+ GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
+ DEBUG(dbgs() << "Pressure after scheduling:\n");
+ auto PressureAfter = getRealRegPressure();
+ LiveIns.clear();
+
+ if (PressureAfter.first <= S.SGPRCriticalLimit &&
+ PressureAfter.second <= S.VGPRCriticalLimit) {
+ DEBUG(dbgs() << "Pressure in desired limits, done.\n");
+ return;
+ }
+ unsigned WavesAfter = getMaxWaves(PressureAfter.first,
+ PressureAfter.second, MF);
+ unsigned WavesBefore = getMaxWaves(PressureBefore.first,
+ PressureBefore.second, MF);
+ DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore <<
+ ", after " << WavesAfter << ".\n");
+
+ // We could not keep current target occupancy because of the just scheduled
+ // region. Record new occupancy for next scheduling cycle.
+ unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
+ if (NewOccupancy < MinOccupancy) {
+ MinOccupancy = NewOccupancy;
+ DEBUG(dbgs() << "Occupancy lowered for the function to "
+ << MinOccupancy << ".\n");
+ }
+
+ if (WavesAfter >= WavesBefore)
+ return;
+
+ DEBUG(dbgs() << "Attempting to revert scheduling.\n");
+ RegionEnd = RegionBegin;
+ for (MachineInstr *MI : Unsched) {
+ if (MI->getIterator() != RegionEnd) {
+ BB->remove(MI);
+ BB->insert(RegionEnd, MI);
+ LIS->handleMove(*MI, true);
+ }
+ // Reset read-undef flags and update them later.
+ for (auto &Op : MI->operands())
+ if (Op.isReg() && Op.isDef())
+ Op.setIsUndef(false);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+ RegionEnd = MI->getIterator();
+ ++RegionEnd;
+ DEBUG(dbgs() << "Scheduling " << *MI);
+ }
+ RegionBegin = Unsched.front()->getIterator();
+ if (Stage == 0)
+ Regions.back() = std::make_pair(RegionBegin, RegionEnd);
+
+ placeDebugValues();
+}
+
+static inline void setMask(const MachineRegisterInfo &MRI,
+ const SIRegisterInfo *SRI, unsigned Reg,
+ LaneBitmask &PrevMask, LaneBitmask NewMask,
+ unsigned &SGPRs, unsigned &VGPRs) {
+ int NewRegs = countPopulation(NewMask.getAsInteger()) -
+ countPopulation(PrevMask.getAsInteger());
+ if (SRI->isSGPRReg(MRI, Reg))
+ SGPRs += NewRegs;
+ if (SRI->isVGPR(MRI, Reg))
+ VGPRs += NewRegs;
+ assert ((int)SGPRs >= 0 && (int)VGPRs >= 0);
+ PrevMask = NewMask;
+}
+
+void GCNScheduleDAGMILive::discoverLiveIns() {
+ unsigned SGPRs = 0;
+ unsigned VGPRs = 0;
+
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
+ SlotIndex SI = LIS->getInstructionIndex(*begin()).getBaseIndex();
+ assert (SI.isValid());
+
+ DEBUG(dbgs() << "Region live-ins:");
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ LaneBitmask LaneMask = LaneBitmask::getNone();
+ if (LI.hasSubRanges()) {
+ for (const auto &S : LI.subranges())
+ if (S.liveAt(SI))
+ LaneMask |= S.LaneMask;
+ } else if (LI.liveAt(SI)) {
+ LaneMask = MRI.getMaxLaneMaskForVReg(Reg);
+ }
+
+ if (LaneMask.any()) {
+ setMask(MRI, SRI, Reg, LiveIns[Reg], LaneMask, SGPRs, VGPRs);
+
+ DEBUG(dbgs() << ' ' << PrintVRegOrUnit(Reg, SRI) << ':'
+ << PrintLaneMask(LiveIns[Reg]));
+ }
+ }
+
+ LiveInPressure = std::make_pair(SGPRs, VGPRs);
+
+ DEBUG(dbgs() << "\nLive-in pressure:\nSGPR = " << SGPRs
+ << "\nVGPR = " << VGPRs << '\n');
+}
+
+std::pair<unsigned, unsigned>
+GCNScheduleDAGMILive::getRealRegPressure() const {
+ unsigned SGPRs, MaxSGPRs, VGPRs, MaxVGPRs;
+ SGPRs = MaxSGPRs = LiveInPressure.first;
+ VGPRs = MaxVGPRs = LiveInPressure.second;
+
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
+ DenseMap<unsigned, LaneBitmask> LiveRegs(LiveIns);
+
+ for (const MachineInstr &MI : *this) {
+ if (MI.isDebugValue())
+ continue;
+ SlotIndex SI = LIS->getInstructionIndex(MI).getBaseIndex();
+ assert (SI.isValid());
+
+ // Remove dead registers or mask bits.
+ for (auto &It : LiveRegs) {
+ if (It.second.none())
+ continue;
+ const LiveInterval &LI = LIS->getInterval(It.first);
+ if (LI.hasSubRanges()) {
+ for (const auto &S : LI.subranges())
+ if (!S.liveAt(SI))
+ setMask(MRI, SRI, It.first, It.second, It.second & ~S.LaneMask,
+ SGPRs, VGPRs);
+ } else if (!LI.liveAt(SI)) {
+ setMask(MRI, SRI, It.first, It.second, LaneBitmask::getNone(),
+ SGPRs, VGPRs);
+ }
+ }
+
+ // Add new registers or mask bits.
+ for (const auto &MO : MI.defs()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask LaneMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ LaneBitmask &LM = LiveRegs[Reg];
+ setMask(MRI, SRI, Reg, LM, LM | LaneMask, SGPRs, VGPRs);
+ }
+ MaxSGPRs = std::max(MaxSGPRs, SGPRs);
+ MaxVGPRs = std::max(MaxVGPRs, VGPRs);
+ }
+
+ DEBUG(dbgs() << "Real region's register pressure:\nSGPR = " << MaxSGPRs
+ << "\nVGPR = " << MaxVGPRs << '\n');
+
+ return std::make_pair(MaxSGPRs, MaxVGPRs);
+}
+
+void GCNScheduleDAGMILive::finalizeSchedule() {
+ // Retry function scheduling if we found resulting occupancy and it is
+ // lower than used for first pass scheduling. This will give more freedom
+ // to schedule low register pressure blocks.
+ // Code is partially copied from MachineSchedulerBase::scheduleRegions().
+
+ if (!LIS || StartingOccupancy <= MinOccupancy)
+ return;
+
+ DEBUG(dbgs() << "Retrying function scheduling with lowest recorded occupancy "
+ << MinOccupancy << ".\n");
+
+ Stage++;
+ GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
+ S.setTargetOccupancy(MinOccupancy);
+
+ MachineBasicBlock *MBB = nullptr;
+ for (auto Region : Regions) {
+ RegionBegin = Region.first;
+ RegionEnd = Region.second;
+
+ if (RegionBegin->getParent() != MBB) {
+ if (MBB) finishBlock();
+ MBB = RegionBegin->getParent();
+ startBlock(MBB);
+ }
+
+ unsigned NumRegionInstrs = std::distance(begin(), end());
+ enterRegion(MBB, begin(), end(), NumRegionInstrs);
+
+ // Skip empty scheduling regions (0 or 1 schedulable instructions).
+ if (begin() == end() || begin() == std::prev(end())) {
+ exitRegion();
+ continue;
+ }
+ DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ DEBUG(dbgs() << MF.getName()
+ << ":BB#" << MBB->getNumber() << " " << MBB->getName()
+ << "\n From: " << *begin() << " To: ";
+ if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+ else dbgs() << "End";
+ dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
+
+ schedule();
+
+ exitRegion();
+ }
+ finishBlock();
+ LiveIns.shrink_and_clear();
+}
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.h b/lib/Target/AMDGPU/GCNSchedStrategy.h
index 4cfc0cea81fb..15af232704ff 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -18,13 +18,16 @@
namespace llvm {
+class SIMachineFunctionInfo;
class SIRegisterInfo;
+class SISubtarget;
/// This is a minimal scheduler strategy. The main difference between this
/// and the GenericScheduler is that GCNSchedStrategy uses different
/// heuristics to determine excess/critical pressure sets. Its goal is to
/// maximize kernel occupancy (i.e. maximum number of waves per simd).
class GCNMaxOccupancySchedStrategy : public GenericScheduler {
+ friend class GCNScheduleDAGMILive;
SUnit *pickNodeBidirectional(bool &IsTopNode);
@@ -35,18 +38,65 @@ class GCNMaxOccupancySchedStrategy : public GenericScheduler {
void initCandidate(SchedCandidate &Cand, SUnit *SU,
bool AtTop, const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI,
- int SGPRPressure, int VGPRPressure,
- int SGPRExcessLimit, int VGPRExcessLimit,
- int SGPRCriticalLimit, int VGPRCriticalLimit);
+ unsigned SGPRPressure, unsigned VGPRPressure);
- void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
- SchedBoundary *Zone, const SIRegisterInfo *SRI,
- unsigned SGPRPressure, unsigned VGPRPressure);
+ unsigned SGPRExcessLimit;
+ unsigned VGPRExcessLimit;
+ unsigned SGPRCriticalLimit;
+ unsigned VGPRCriticalLimit;
+
+ unsigned TargetOccupancy;
+
+ MachineFunction *MF;
public:
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
SUnit *pickNode(bool &IsTopNode) override;
+
+ void initialize(ScheduleDAGMI *DAG) override;
+
+ void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
+};
+
+class GCNScheduleDAGMILive : public ScheduleDAGMILive {
+
+ const SISubtarget &ST;
+
+ const SIMachineFunctionInfo &MFI;
+
+ // Occupancy target at the begining of function scheduling cycle.
+ unsigned StartingOccupancy;
+
+ // Minimal real occupancy recorder for the function.
+ unsigned MinOccupancy;
+
+ // Scheduling stage number.
+ unsigned Stage;
+
+ // Vecor of regions recorder for later rescheduling
+ SmallVector<std::pair<MachineBasicBlock::iterator,
+ MachineBasicBlock::iterator>, 32> Regions;
+
+ // Region live-ins.
+ DenseMap<unsigned, LaneBitmask> LiveIns;
+
+ // Number of live-ins to the current region, first SGPR then VGPR.
+ std::pair<unsigned, unsigned> LiveInPressure;
+
+ // Collect current region live-ins.
+ void discoverLiveIns();
+
+ // Return current region pressure. First value is SGPR number, second is VGPR.
+ std::pair<unsigned, unsigned> getRealRegPressure() const;
+
+public:
+ GCNScheduleDAGMILive(MachineSchedContext *C,
+ std::unique_ptr<MachineSchedStrategy> S);
+
+ void schedule() override;
+
+ void finalizeSchedule() override;
};
} // End namespace llvm
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 7172a0aa7167..a817ff3cbaf0 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -113,7 +113,7 @@ void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
- O << " offset:";
+ O << ((OpNo == 0)? "offset:" : " offset:");
printU16ImmDecOperand(MI, OpNo, O);
}
}
@@ -375,6 +375,14 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
O << formatHex(static_cast<uint64_t>(Imm));
}
+void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint16_t Lo16 = static_cast<uint16_t>(Imm);
+ assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
+ printImmediate16(Lo16, STI, O);
+}
+
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -489,6 +497,10 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_IMM_FP16:
printImmediate16(Op.getImm(), STI, O);
break;
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ printImmediateV216(Op.getImm(), STI, O);
+ break;
case MCOI::OPERAND_UNKNOWN:
case MCOI::OPERAND_PCREL:
O << formatDec(Op.getImm());
@@ -531,13 +543,34 @@ void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned InputModifiers = MI->getOperand(OpNo).getImm();
- if (InputModifiers & SISrcMods::NEG)
- O << '-';
+
+ // Use 'neg(...)' instead of '-' to avoid ambiguity.
+ // This is important for integer literals because
+ // -1 is not the same value as neg(1).
+ bool NegMnemo = false;
+
+ if (InputModifiers & SISrcMods::NEG) {
+ if (OpNo + 1 < MI->getNumOperands() &&
+ (InputModifiers & SISrcMods::ABS) == 0) {
+ const MCOperand &Op = MI->getOperand(OpNo + 1);
+ NegMnemo = Op.isImm() || Op.isFPImm();
+ }
+ if (NegMnemo) {
+ O << "neg(";
+ } else {
+ O << '-';
+ }
+ }
+
if (InputModifiers & SISrcMods::ABS)
O << '|';
printOperand(MI, OpNo + 1, STI, O);
if (InputModifiers & SISrcMods::ABS)
O << '|';
+
+ if (NegMnemo) {
+ O << ')';
+ }
}
void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
@@ -672,11 +705,19 @@ template <unsigned N>
void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- int EnIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::en);
+ unsigned Opc = MI->getOpcode();
+ int EnIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::en);
unsigned En = MI->getOperand(EnIdx).getImm();
- // FIXME: What do we do with compr? The meaning of en changes depending on if
- // compr is set.
+ int ComprIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::compr);
+
+ // If compr is set, print as src0, src0, src1, src1
+ if (MI->getOperand(ComprIdx).getImm()) {
+ if (N == 1 || N == 2)
+ --OpNo;
+ else if (N == 3)
+ OpNo -= 2;
+ }
if (En & (1 << N))
printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
@@ -730,6 +771,71 @@ void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
}
}
+static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod) {
+ int DefaultValue = (Mod == SISrcMods::OP_SEL_1);
+
+ for (int I = 0; I < NumOps; ++I) {
+ if (!!(Ops[I] & Mod) != DefaultValue)
+ return false;
+ }
+
+ return true;
+}
+
+static void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
+ raw_ostream &O) {
+ unsigned Opc = MI->getOpcode();
+ int NumOps = 0;
+ int Ops[3];
+
+ for (int OpName : { AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers }) {
+ int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
+ if (Idx == -1)
+ break;
+
+ Ops[NumOps++] = MI->getOperand(Idx).getImm();
+ }
+
+ if (allOpsDefaultValue(Ops, NumOps, Mod))
+ return;
+
+ O << Name;
+ for (int I = 0; I < NumOps; ++I) {
+ if (I != 0)
+ O << ',';
+
+ O << !!(Ops[I] & Mod);
+ }
+
+ O << ']';
+}
+
+void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
+}
+
+void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
+}
+
+void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
+}
+
+void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
+}
+
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1057,27 +1163,28 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- IsaVersion IV = getIsaVersion(STI.getFeatureBits());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
unsigned SImm16 = MI->getOperand(OpNo).getImm();
unsigned Vmcnt, Expcnt, Lgkmcnt;
- decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt);
+ decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
bool NeedSpace = false;
- if (Vmcnt != getVmcntBitMask(IV)) {
+ if (Vmcnt != getVmcntBitMask(ISA)) {
O << "vmcnt(" << Vmcnt << ')';
NeedSpace = true;
}
- if (Expcnt != getExpcntBitMask(IV)) {
+ if (Expcnt != getExpcntBitMask(ISA)) {
if (NeedSpace)
O << ' ';
O << "expcnt(" << Expcnt << ')';
NeedSpace = true;
}
- if (Lgkmcnt != getLgkmcntBitMask(IV)) {
+ if (Lgkmcnt != getLgkmcntBitMask(ISA)) {
if (NeedSpace)
O << ' ';
O << "lgkmcnt(" << Lgkmcnt << ')';
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index a6d348ff0f12..c0b8e5c51089 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -90,6 +90,8 @@ private:
raw_ostream &O);
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
@@ -117,6 +119,14 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printSDWADstUnused(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printOpSel(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printOpSelHi(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printNegLo(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printNegHi(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpSlot(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpAttr(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/AMDGPU/LLVMBuild.txt b/lib/Target/AMDGPU/LLVMBuild.txt
index bbdd17737cf0..c54a13c4b4d8 100644
--- a/lib/Target/AMDGPU/LLVMBuild.txt
+++ b/lib/Target/AMDGPU/LLVMBuild.txt
@@ -30,5 +30,5 @@ has_disassembler = 1
type = Library
name = AMDGPUCodeGen
parent = AMDGPU
-required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize
+required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel
add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index ffb92aae599e..f3266fe82955 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -37,7 +37,7 @@ public:
bool &IsResolved) override;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override {
@@ -131,7 +131,7 @@ void AMDGPUAsmBackend::processFixupValue(const MCAssembler &Asm,
void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
if (!Value)
return; // Doesn't change encoding.
@@ -164,7 +164,20 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
}
bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
- OW->WriteZeros(Count);
+ // If the count is not 4-byte aligned, we must be writing data into the text
+ // section (otherwise we have unaligned instructions, and thus have far
+ // bigger problems), so just write zeros instead.
+ OW->WriteZeros(Count % 4);
+
+ // We are properly aligned, so write NOPs as requested.
+ Count /= 4;
+
+ // FIXME: R600 support.
+ // s_nop 0
+ const uint32_t Encoded_S_NOP_0 = 0xbf800000;
+
+ for (uint64_t I = 0; I != Count; ++I)
+ OW->write32(Encoded_S_NOP_0);
return true;
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
new file mode 100644
index 000000000000..816e8c744b27
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
@@ -0,0 +1,422 @@
+//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata definitions and in-memory
+/// representations.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
+
+#include <cstdint>
+#include <string>
+#include <system_error>
+#include <vector>
+
+namespace llvm {
+namespace AMDGPU {
+
+//===----------------------------------------------------------------------===//
+// Code Object Metadata.
+//===----------------------------------------------------------------------===//
+namespace CodeObject {
+
+/// \brief Code object metadata major version.
+constexpr uint32_t MetadataVersionMajor = 1;
+/// \brief Code object metadata minor version.
+constexpr uint32_t MetadataVersionMinor = 0;
+
+/// \brief Code object metadata beginning assembler directive.
+constexpr char MetadataAssemblerDirectiveBegin[] =
+ ".amdgpu_code_object_metadata";
+/// \brief Code object metadata ending assembler directive.
+constexpr char MetadataAssemblerDirectiveEnd[] =
+ ".end_amdgpu_code_object_metadata";
+
+/// \brief Access qualifiers.
+enum class AccessQualifier : uint8_t {
+ Default = 0,
+ ReadOnly = 1,
+ WriteOnly = 2,
+ ReadWrite = 3,
+ Unknown = 0xff
+};
+
+/// \brief Address space qualifiers.
+enum class AddressSpaceQualifier : uint8_t {
+ Private = 0,
+ Global = 1,
+ Constant = 2,
+ Local = 3,
+ Generic = 4,
+ Region = 5,
+ Unknown = 0xff
+};
+
+/// \brief Value kinds.
+enum class ValueKind : uint8_t {
+ ByValue = 0,
+ GlobalBuffer = 1,
+ DynamicSharedPointer = 2,
+ Sampler = 3,
+ Image = 4,
+ Pipe = 5,
+ Queue = 6,
+ HiddenGlobalOffsetX = 7,
+ HiddenGlobalOffsetY = 8,
+ HiddenGlobalOffsetZ = 9,
+ HiddenNone = 10,
+ HiddenPrintfBuffer = 11,
+ HiddenDefaultQueue = 12,
+ HiddenCompletionAction = 13,
+ Unknown = 0xff
+};
+
+/// \brief Value types.
+enum class ValueType : uint8_t {
+ Struct = 0,
+ I8 = 1,
+ U8 = 2,
+ I16 = 3,
+ U16 = 4,
+ F16 = 5,
+ I32 = 6,
+ U32 = 7,
+ F32 = 8,
+ I64 = 9,
+ U64 = 10,
+ F64 = 11,
+ Unknown = 0xff
+};
+
+//===----------------------------------------------------------------------===//
+// Kernel Metadata.
+//===----------------------------------------------------------------------===//
+namespace Kernel {
+
+//===----------------------------------------------------------------------===//
+// Kernel Attributes Metadata.
+//===----------------------------------------------------------------------===//
+namespace Attrs {
+
+namespace Key {
+/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize.
+constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize";
+/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint.
+constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint";
+/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint.
+constexpr char VecTypeHint[] = "VecTypeHint";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel attributes metadata.
+struct Metadata final {
+ /// \brief 'reqd_work_group_size' attribute. Optional.
+ std::vector<uint32_t> mReqdWorkGroupSize = std::vector<uint32_t>();
+ /// \brief 'work_group_size_hint' attribute. Optional.
+ std::vector<uint32_t> mWorkGroupSizeHint = std::vector<uint32_t>();
+ /// \brief 'vec_type_hint' attribute. Optional.
+ std::string mVecTypeHint = std::string();
+
+ /// \brief Default constructor.
+ Metadata() = default;
+
+ /// \returns True if kernel attributes metadata is empty, false otherwise.
+ bool empty() const {
+ return mReqdWorkGroupSize.empty() &&
+ mWorkGroupSizeHint.empty() &&
+ mVecTypeHint.empty();
+ }
+
+ /// \returns True if kernel attributes metadata is not empty, false otherwise.
+ bool notEmpty() const {
+ return !empty();
+ }
+};
+
+} // end namespace Attrs
+
+//===----------------------------------------------------------------------===//
+// Kernel Argument Metadata.
+//===----------------------------------------------------------------------===//
+namespace Arg {
+
+namespace Key {
+/// \brief Key for Kernel::Arg::Metadata::mSize.
+constexpr char Size[] = "Size";
+/// \brief Key for Kernel::Arg::Metadata::mAlign.
+constexpr char Align[] = "Align";
+/// \brief Key for Kernel::Arg::Metadata::mValueKind.
+constexpr char ValueKind[] = "ValueKind";
+/// \brief Key for Kernel::Arg::Metadata::mValueType.
+constexpr char ValueType[] = "ValueType";
+/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign.
+constexpr char PointeeAlign[] = "PointeeAlign";
+/// \brief Key for Kernel::Arg::Metadata::mAccQual.
+constexpr char AccQual[] = "AccQual";
+/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual.
+constexpr char AddrSpaceQual[] = "AddrSpaceQual";
+/// \brief Key for Kernel::Arg::Metadata::mIsConst.
+constexpr char IsConst[] = "IsConst";
+/// \brief Key for Kernel::Arg::Metadata::mIsPipe.
+constexpr char IsPipe[] = "IsPipe";
+/// \brief Key for Kernel::Arg::Metadata::mIsRestrict.
+constexpr char IsRestrict[] = "IsRestrict";
+/// \brief Key for Kernel::Arg::Metadata::mIsVolatile.
+constexpr char IsVolatile[] = "IsVolatile";
+/// \brief Key for Kernel::Arg::Metadata::mName.
+constexpr char Name[] = "Name";
+/// \brief Key for Kernel::Arg::Metadata::mTypeName.
+constexpr char TypeName[] = "TypeName";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel argument metadata.
+struct Metadata final {
+ /// \brief Size in bytes. Required.
+ uint32_t mSize = 0;
+ /// \brief Alignment in bytes. Required.
+ uint32_t mAlign = 0;
+ /// \brief Value kind. Required.
+ ValueKind mValueKind = ValueKind::Unknown;
+ /// \brief Value type. Required.
+ ValueType mValueType = ValueType::Unknown;
+ /// \brief Pointee alignment in bytes. Optional.
+ uint32_t mPointeeAlign = 0;
+ /// \brief Access qualifier. Optional.
+ AccessQualifier mAccQual = AccessQualifier::Unknown;
+ /// \brief Address space qualifier. Optional.
+ AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown;
+ /// \brief True if 'const' qualifier is specified. Optional.
+ bool mIsConst = false;
+ /// \brief True if 'pipe' qualifier is specified. Optional.
+ bool mIsPipe = false;
+ /// \brief True if 'restrict' qualifier is specified. Optional.
+ bool mIsRestrict = false;
+ /// \brief True if 'volatile' qualifier is specified. Optional.
+ bool mIsVolatile = false;
+ /// \brief Name. Optional.
+ std::string mName = std::string();
+ /// \brief Type name. Optional.
+ std::string mTypeName = std::string();
+
+ /// \brief Default constructor.
+ Metadata() = default;
+};
+
+} // end namespace Arg
+
+//===----------------------------------------------------------------------===//
+// Kernel Code Properties Metadata.
+//===----------------------------------------------------------------------===//
+namespace CodeProps {
+
+namespace Key {
+/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentSize.
+constexpr char KernargSegmentSize[] = "KernargSegmentSize";
+/// \brief Key for Kernel::CodeProps::Metadata::mWorkgroupGroupSegmentSize.
+constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize";
+/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemPrivateSegmentSize.
+constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize";
+/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontNumSGPRs.
+constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs";
+/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemNumVGPRs.
+constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs";
+/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentAlign.
+constexpr char KernargSegmentAlign[] = "KernargSegmentAlign";
+/// \brief Key for Kernel::CodeProps::Metadata::mGroupSegmentAlign.
+constexpr char GroupSegmentAlign[] = "GroupSegmentAlign";
+/// \brief Key for Kernel::CodeProps::Metadata::mPrivateSegmentAlign.
+constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign";
+/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontSize.
+constexpr char WavefrontSize[] = "WavefrontSize";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel code properties metadata.
+struct Metadata final {
+ /// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory
+ /// holds the values of the arguments to the kernel. Optional.
+ uint64_t mKernargSegmentSize = 0;
+ /// \brief Size in bytes of the group segment memory required by a workgroup.
+ /// This value does not include any dynamically allocated group segment memory
+ /// that may be added when the kernel is dispatched. Optional.
+ uint32_t mWorkgroupGroupSegmentSize = 0;
+ /// \brief Size in bytes of the private segment memory required by a workitem.
+ /// Private segment memory includes arg, spill and private segments. Optional.
+ uint32_t mWorkitemPrivateSegmentSize = 0;
+ /// \brief Total number of SGPRs used by a wavefront. Optional.
+ uint16_t mWavefrontNumSGPRs = 0;
+ /// \brief Total number of VGPRs used by a workitem. Optional.
+ uint16_t mWorkitemNumVGPRs = 0;
+ /// \brief Maximum byte alignment of variables used by the kernel in the
+ /// kernarg memory segment. Expressed as a power of two. Optional.
+ uint8_t mKernargSegmentAlign = 0;
+ /// \brief Maximum byte alignment of variables used by the kernel in the
+ /// group memory segment. Expressed as a power of two. Optional.
+ uint8_t mGroupSegmentAlign = 0;
+ /// \brief Maximum byte alignment of variables used by the kernel in the
+ /// private memory segment. Expressed as a power of two. Optional.
+ uint8_t mPrivateSegmentAlign = 0;
+ /// \brief Wavefront size. Expressed as a power of two. Optional.
+ uint8_t mWavefrontSize = 0;
+
+ /// \brief Default constructor.
+ Metadata() = default;
+
+ /// \returns True if kernel code properties metadata is empty, false
+ /// otherwise.
+ bool empty() const {
+ return !notEmpty();
+ }
+
+ /// \returns True if kernel code properties metadata is not empty, false
+ /// otherwise.
+ bool notEmpty() const {
+ return mKernargSegmentSize || mWorkgroupGroupSegmentSize ||
+ mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs ||
+ mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign ||
+ mPrivateSegmentAlign || mWavefrontSize;
+ }
+};
+
+} // end namespace CodeProps
+
+//===----------------------------------------------------------------------===//
+// Kernel Debug Properties Metadata.
+//===----------------------------------------------------------------------===//
+namespace DebugProps {
+
+namespace Key {
+/// \brief Key for Kernel::DebugProps::Metadata::mDebuggerABIVersion.
+constexpr char DebuggerABIVersion[] = "DebuggerABIVersion";
+/// \brief Key for Kernel::DebugProps::Metadata::mReservedNumVGPRs.
+constexpr char ReservedNumVGPRs[] = "ReservedNumVGPRs";
+/// \brief Key for Kernel::DebugProps::Metadata::mReservedFirstVGPR.
+constexpr char ReservedFirstVGPR[] = "ReservedFirstVGPR";
+/// \brief Key for Kernel::DebugProps::Metadata::mPrivateSegmentBufferSGPR.
+constexpr char PrivateSegmentBufferSGPR[] = "PrivateSegmentBufferSGPR";
+/// \brief Key for
+/// Kernel::DebugProps::Metadata::mWavefrontPrivateSegmentOffsetSGPR.
+constexpr char WavefrontPrivateSegmentOffsetSGPR[] =
+ "WavefrontPrivateSegmentOffsetSGPR";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel debug properties metadata.
+struct Metadata final {
+ /// \brief Debugger ABI version. Optional.
+ std::vector<uint32_t> mDebuggerABIVersion = std::vector<uint32_t>();
+ /// \brief Consecutive number of VGPRs reserved for debugger use. Must be 0 if
+ /// mDebuggerABIVersion is not set. Optional.
+ uint16_t mReservedNumVGPRs = 0;
+ /// \brief First fixed VGPR reserved. Must be uint16_t(-1) if
+ /// mDebuggerABIVersion is not set or mReservedFirstVGPR is 0. Optional.
+ uint16_t mReservedFirstVGPR = uint16_t(-1);
+ /// \brief Fixed SGPR of the first of 4 SGPRs used to hold the scratch V# used
+ /// for the entire kernel execution. Must be uint16_t(-1) if
+ /// mDebuggerABIVersion is not set or SGPR not used or not known. Optional.
+ uint16_t mPrivateSegmentBufferSGPR = uint16_t(-1);
+ /// \brief Fixed SGPR used to hold the wave scratch offset for the entire
+ /// kernel execution. Must be uint16_t(-1) if mDebuggerABIVersion is not set
+ /// or SGPR is not used or not known. Optional.
+ uint16_t mWavefrontPrivateSegmentOffsetSGPR = uint16_t(-1);
+
+ /// \brief Default constructor.
+ Metadata() = default;
+
+ /// \returns True if kernel debug properties metadata is empty, false
+ /// otherwise.
+ bool empty() const {
+ return !notEmpty();
+ }
+
+ /// \returns True if kernel debug properties metadata is not empty, false
+ /// otherwise.
+ bool notEmpty() const {
+ return !mDebuggerABIVersion.empty();
+ }
+};
+
+} // end namespace DebugProps
+
+namespace Key {
+/// \brief Key for Kernel::Metadata::mName.
+constexpr char Name[] = "Name";
+/// \brief Key for Kernel::Metadata::mLanguage.
+constexpr char Language[] = "Language";
+/// \brief Key for Kernel::Metadata::mLanguageVersion.
+constexpr char LanguageVersion[] = "LanguageVersion";
+/// \brief Key for Kernel::Metadata::mAttrs.
+constexpr char Attrs[] = "Attrs";
+/// \brief Key for Kernel::Metadata::mArgs.
+constexpr char Args[] = "Args";
+/// \brief Key for Kernel::Metadata::mCodeProps.
+constexpr char CodeProps[] = "CodeProps";
+/// \brief Key for Kernel::Metadata::mDebugProps.
+constexpr char DebugProps[] = "DebugProps";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel metadata.
+struct Metadata final {
+ /// \brief Name. Required.
+ std::string mName = std::string();
+ /// \brief Language. Optional.
+ std::string mLanguage = std::string();
+ /// \brief Language version. Optional.
+ std::vector<uint32_t> mLanguageVersion = std::vector<uint32_t>();
+ /// \brief Attributes metadata. Optional.
+ Attrs::Metadata mAttrs = Attrs::Metadata();
+ /// \brief Arguments metadata. Optional.
+ std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>();
+ /// \brief Code properties metadata. Optional.
+ CodeProps::Metadata mCodeProps = CodeProps::Metadata();
+ /// \brief Debug properties metadata. Optional.
+ DebugProps::Metadata mDebugProps = DebugProps::Metadata();
+
+ /// \brief Default constructor.
+ Metadata() = default;
+};
+
+} // end namespace Kernel
+
+namespace Key {
+/// \brief Key for CodeObject::Metadata::mVersion.
+constexpr char Version[] = "Version";
+/// \brief Key for CodeObject::Metadata::mPrintf.
+constexpr char Printf[] = "Printf";
+/// \brief Key for CodeObject::Metadata::mKernels.
+constexpr char Kernels[] = "Kernels";
+} // end namespace Key
+
+/// \brief In-memory representation of code object metadata.
+struct Metadata final {
+ /// \brief Code object metadata version. Required.
+ std::vector<uint32_t> mVersion = std::vector<uint32_t>();
+ /// \brief Printf metadata. Optional.
+ std::vector<std::string> mPrintf = std::vector<std::string>();
+ /// \brief Kernels metadata. Optional.
+ std::vector<Kernel::Metadata> mKernels = std::vector<Kernel::Metadata>();
+
+ /// \brief Default constructor.
+ Metadata() = default;
+
+ /// \brief Converts \p YamlString to \p CodeObjectMetadata.
+ static std::error_code fromYamlString(std::string YamlString,
+ Metadata &CodeObjectMetadata);
+
+ /// \brief Converts \p CodeObjectMetadata to \p YamlString.
+ static std::error_code toYamlString(Metadata CodeObjectMetadata,
+ std::string &YamlString);
+};
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
new file mode 100644
index 000000000000..29a6ab9fbe93
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
@@ -0,0 +1,625 @@
+//===--- AMDGPUCodeObjectMetadataStreamer.cpp -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata Streamer.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUCodeObjectMetadataStreamer.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::CodeObject;
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
+LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata)
+LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
+
+namespace llvm {
+
+static cl::opt<bool> DumpCodeObjectMetadata(
+ "amdgpu-dump-comd",
+ cl::desc("Dump AMDGPU Code Object Metadata"));
+static cl::opt<bool> VerifyCodeObjectMetadata(
+ "amdgpu-verify-comd",
+ cl::desc("Verify AMDGPU Code Object Metadata"));
+
+namespace yaml {
+
+template <>
+struct ScalarEnumerationTraits<AccessQualifier> {
+ static void enumeration(IO &YIO, AccessQualifier &EN) {
+ YIO.enumCase(EN, "Default", AccessQualifier::Default);
+ YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly);
+ YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly);
+ YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<AddressSpaceQualifier> {
+ static void enumeration(IO &YIO, AddressSpaceQualifier &EN) {
+ YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private);
+ YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global);
+ YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant);
+ YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local);
+ YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic);
+ YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<ValueKind> {
+ static void enumeration(IO &YIO, ValueKind &EN) {
+ YIO.enumCase(EN, "ByValue", ValueKind::ByValue);
+ YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer);
+ YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer);
+ YIO.enumCase(EN, "Sampler", ValueKind::Sampler);
+ YIO.enumCase(EN, "Image", ValueKind::Image);
+ YIO.enumCase(EN, "Pipe", ValueKind::Pipe);
+ YIO.enumCase(EN, "Queue", ValueKind::Queue);
+ YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX);
+ YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY);
+ YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ);
+ YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone);
+ YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer);
+ YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
+ YIO.enumCase(EN, "HiddenCompletionAction",
+ ValueKind::HiddenCompletionAction);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<ValueType> {
+ static void enumeration(IO &YIO, ValueType &EN) {
+ YIO.enumCase(EN, "Struct", ValueType::Struct);
+ YIO.enumCase(EN, "I8", ValueType::I8);
+ YIO.enumCase(EN, "U8", ValueType::U8);
+ YIO.enumCase(EN, "I16", ValueType::I16);
+ YIO.enumCase(EN, "U16", ValueType::U16);
+ YIO.enumCase(EN, "F16", ValueType::F16);
+ YIO.enumCase(EN, "I32", ValueType::I32);
+ YIO.enumCase(EN, "U32", ValueType::U32);
+ YIO.enumCase(EN, "F32", ValueType::F32);
+ YIO.enumCase(EN, "I64", ValueType::I64);
+ YIO.enumCase(EN, "U64", ValueType::U64);
+ YIO.enumCase(EN, "F64", ValueType::F64);
+ }
+};
+
+template <>
+struct MappingTraits<Kernel::Attrs::Metadata> {
+ static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) {
+ YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize,
+ MD.mReqdWorkGroupSize, std::vector<uint32_t>());
+ YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint,
+ MD.mWorkGroupSizeHint, std::vector<uint32_t>());
+ YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint,
+ MD.mVecTypeHint, std::string());
+ }
+};
+
+template <>
+struct MappingTraits<Kernel::Arg::Metadata> {
+ static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) {
+ YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize);
+ YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign);
+ YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind);
+ YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType);
+ YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign,
+ uint32_t(0));
+ YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual,
+ AccessQualifier::Unknown);
+ YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual,
+ AddressSpaceQualifier::Unknown);
+ YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false);
+ YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false);
+ YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false);
+ YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false);
+ YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string());
+ YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string());
+ }
+};
+
+template <>
+struct MappingTraits<Kernel::CodeProps::Metadata> {
+ static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) {
+ YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize,
+ MD.mKernargSegmentSize, uint64_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize,
+ MD.mWorkgroupGroupSegmentSize, uint32_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize,
+ MD.mWorkitemPrivateSegmentSize, uint32_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs,
+ MD.mWavefrontNumSGPRs, uint16_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs,
+ MD.mWorkitemNumVGPRs, uint16_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign,
+ MD.mKernargSegmentAlign, uint8_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign,
+ MD.mGroupSegmentAlign, uint8_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign,
+ MD.mPrivateSegmentAlign, uint8_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize,
+ MD.mWavefrontSize, uint8_t(0));
+ }
+};
+
+template <>
+struct MappingTraits<Kernel::DebugProps::Metadata> {
+ static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) {
+ YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion,
+ MD.mDebuggerABIVersion, std::vector<uint32_t>());
+ YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs,
+ MD.mReservedNumVGPRs, uint16_t(0));
+ YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR,
+ MD.mReservedFirstVGPR, uint16_t(-1));
+ YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR,
+ MD.mPrivateSegmentBufferSGPR, uint16_t(-1));
+ YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR,
+ MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1));
+ }
+};
+
+template <>
+struct MappingTraits<Kernel::Metadata> {
+ static void mapping(IO &YIO, Kernel::Metadata &MD) {
+ YIO.mapRequired(Kernel::Key::Name, MD.mName);
+ YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string());
+ YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion,
+ std::vector<uint32_t>());
+ if (!MD.mAttrs.empty() || !YIO.outputting())
+ YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs);
+ if (!MD.mArgs.empty() || !YIO.outputting())
+ YIO.mapOptional(Kernel::Key::Args, MD.mArgs);
+ if (!MD.mCodeProps.empty() || !YIO.outputting())
+ YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps);
+ if (!MD.mDebugProps.empty() || !YIO.outputting())
+ YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps);
+ }
+};
+
+template <>
+struct MappingTraits<CodeObject::Metadata> {
+ static void mapping(IO &YIO, CodeObject::Metadata &MD) {
+ YIO.mapRequired(Key::Version, MD.mVersion);
+ YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>());
+ if (!MD.mKernels.empty() || !YIO.outputting())
+ YIO.mapOptional(Key::Kernels, MD.mKernels);
+ }
+};
+
+} // end namespace yaml
+
+namespace AMDGPU {
+
+/* static */
+std::error_code CodeObject::Metadata::fromYamlString(
+ std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) {
+ yaml::Input YamlInput(YamlString);
+ YamlInput >> CodeObjectMetadata;
+ return YamlInput.error();
+}
+
+/* static */
+std::error_code CodeObject::Metadata::toYamlString(
+ CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) {
+ raw_string_ostream YamlStream(YamlString);
+ yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max());
+ YamlOutput << CodeObjectMetadata;
+ return std::error_code();
+}
+
+namespace CodeObject {
+
+void MetadataStreamer::dump(StringRef YamlString) const {
+ errs() << "AMDGPU Code Object Metadata:\n" << YamlString << '\n';
+}
+
+void MetadataStreamer::verify(StringRef YamlString) const {
+ errs() << "AMDGPU Code Object Metadata Parser Test: ";
+
+ CodeObject::Metadata FromYamlString;
+ if (Metadata::fromYamlString(YamlString, FromYamlString)) {
+ errs() << "FAIL\n";
+ return;
+ }
+
+ std::string ToYamlString;
+ if (Metadata::toYamlString(FromYamlString, ToYamlString)) {
+ errs() << "FAIL\n";
+ return;
+ }
+
+ errs() << (YamlString == ToYamlString ? "PASS" : "FAIL") << '\n';
+ if (YamlString != ToYamlString) {
+ errs() << "Original input: " << YamlString << '\n'
+ << "Produced output: " << ToYamlString << '\n';
+ }
+}
+
+AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
+ if (AccQual.empty())
+ return AccessQualifier::Unknown;
+
+ return StringSwitch<AccessQualifier>(AccQual)
+ .Case("read_only", AccessQualifier::ReadOnly)
+ .Case("write_only", AccessQualifier::WriteOnly)
+ .Case("read_write", AccessQualifier::ReadWrite)
+ .Default(AccessQualifier::Default);
+}
+
+AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
+ unsigned AddressSpace) const {
+ if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
+ return AddressSpaceQualifier::Private;
+ if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
+ return AddressSpaceQualifier::Global;
+ if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
+ return AddressSpaceQualifier::Constant;
+ if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
+ return AddressSpaceQualifier::Local;
+ if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
+ return AddressSpaceQualifier::Generic;
+ if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
+ return AddressSpaceQualifier::Region;
+
+ llvm_unreachable("Unknown address space qualifier");
+}
+
+ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const {
+ if (TypeQual.find("pipe") != StringRef::npos)
+ return ValueKind::Pipe;
+
+ return StringSwitch<ValueKind>(BaseTypeName)
+ .Case("sampler_t", ValueKind::Sampler)
+ .Case("queue_t", ValueKind::Queue)
+ .Cases("image1d_t",
+ "image1d_array_t",
+ "image1d_buffer_t",
+ "image2d_t" ,
+ "image2d_array_t",
+ "image2d_array_depth_t",
+ "image2d_array_msaa_t"
+ "image2d_array_msaa_depth_t"
+ "image2d_depth_t",
+ "image2d_msaa_t",
+ "image2d_msaa_depth_t",
+ "image3d_t", ValueKind::Image)
+ .Default(isa<PointerType>(Ty) ?
+ (Ty->getPointerAddressSpace() ==
+ AMDGPUASI.LOCAL_ADDRESS ?
+ ValueKind::DynamicSharedPointer :
+ ValueKind::GlobalBuffer) :
+ ValueKind::ByValue);
+}
+
+ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ auto Signed = !TypeName.startswith("u");
+ switch (Ty->getIntegerBitWidth()) {
+ case 8:
+ return Signed ? ValueType::I8 : ValueType::U8;
+ case 16:
+ return Signed ? ValueType::I16 : ValueType::U16;
+ case 32:
+ return Signed ? ValueType::I32 : ValueType::U32;
+ case 64:
+ return Signed ? ValueType::I64 : ValueType::U64;
+ default:
+ return ValueType::Struct;
+ }
+ }
+ case Type::HalfTyID:
+ return ValueType::F16;
+ case Type::FloatTyID:
+ return ValueType::F32;
+ case Type::DoubleTyID:
+ return ValueType::F64;
+ case Type::PointerTyID:
+ return getValueType(Ty->getPointerElementType(), TypeName);
+ case Type::VectorTyID:
+ return getValueType(Ty->getVectorElementType(), TypeName);
+ default:
+ return ValueType::Struct;
+ }
+}
+
+std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ if (!Signed)
+ return (Twine('u') + getTypeName(Ty, true)).str();
+
+ auto BitWidth = Ty->getIntegerBitWidth();
+ switch (BitWidth) {
+ case 8:
+ return "char";
+ case 16:
+ return "short";
+ case 32:
+ return "int";
+ case 64:
+ return "long";
+ default:
+ return (Twine('i') + Twine(BitWidth)).str();
+ }
+ }
+ case Type::HalfTyID:
+ return "half";
+ case Type::FloatTyID:
+ return "float";
+ case Type::DoubleTyID:
+ return "double";
+ case Type::VectorTyID: {
+ auto VecTy = cast<VectorType>(Ty);
+ auto ElTy = VecTy->getElementType();
+ auto NumElements = VecTy->getVectorNumElements();
+ return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
+ }
+ default:
+ return "unknown";
+ }
+}
+
+std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
+ MDNode *Node) const {
+ std::vector<uint32_t> Dims;
+ if (Node->getNumOperands() != 3)
+ return Dims;
+
+ for (auto &Op : Node->operands())
+ Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
+ return Dims;
+}
+
+void MetadataStreamer::emitVersion() {
+ auto &Version = CodeObjectMetadata.mVersion;
+
+ Version.push_back(MetadataVersionMajor);
+ Version.push_back(MetadataVersionMinor);
+}
+
+void MetadataStreamer::emitPrintf(const Module &Mod) {
+ auto &Printf = CodeObjectMetadata.mPrintf;
+
+ auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
+ if (!Node)
+ return;
+
+ for (auto Op : Node->operands())
+ if (Op->getNumOperands())
+ Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
+}
+
+void MetadataStreamer::emitKernelLanguage(const Function &Func) {
+ auto &Kernel = CodeObjectMetadata.mKernels.back();
+
+ // TODO: What about other languages?
+ auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
+ if (!Node || !Node->getNumOperands())
+ return;
+ auto Op0 = Node->getOperand(0);
+ if (Op0->getNumOperands() <= 1)
+ return;
+
+ Kernel.mLanguage = "OpenCL C";
+ Kernel.mLanguageVersion.push_back(
+ mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
+ Kernel.mLanguageVersion.push_back(
+ mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
+}
+
+void MetadataStreamer::emitKernelAttrs(const Function &Func) {
+ auto &Attrs = CodeObjectMetadata.mKernels.back().mAttrs;
+
+ if (auto Node = Func.getMetadata("reqd_work_group_size"))
+ Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
+ if (auto Node = Func.getMetadata("work_group_size_hint"))
+ Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
+ if (auto Node = Func.getMetadata("vec_type_hint")) {
+ Attrs.mVecTypeHint = getTypeName(
+ cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
+ mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
+ }
+}
+
+void MetadataStreamer::emitKernelArgs(const Function &Func) {
+ for (auto &Arg : Func.args())
+ emitKernelArg(Arg);
+
+ // TODO: What about other languages?
+ if (!Func.getParent()->getNamedMetadata("opencl.ocl.version"))
+ return;
+
+ auto &DL = Func.getParent()->getDataLayout();
+ auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+ emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
+ emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
+ emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
+
+ if (!Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+ return;
+
+ auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
+ AMDGPUASI.GLOBAL_ADDRESS);
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
+}
+
+void MetadataStreamer::emitKernelArg(const Argument &Arg) {
+ auto Func = Arg.getParent();
+ auto ArgNo = Arg.getArgNo();
+ const MDNode *Node;
+
+ StringRef TypeQual;
+ Node = Func->getMetadata("kernel_arg_type_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef BaseTypeName;
+ Node = Func->getMetadata("kernel_arg_base_type");
+ if (Node && ArgNo < Node->getNumOperands())
+ BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef AccQual;
+ if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
+ Arg.hasNoAliasAttr()) {
+ AccQual = "read_only";
+ } else {
+ Node = Func->getMetadata("kernel_arg_access_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+ }
+
+ StringRef Name;
+ Node = Func->getMetadata("kernel_arg_name");
+ if (Node && ArgNo < Node->getNumOperands())
+ Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef TypeName;
+ Node = Func->getMetadata("kernel_arg_type");
+ if (Node && ArgNo < Node->getNumOperands())
+ TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
+ getValueKind(Arg.getType(), TypeQual, BaseTypeName), TypeQual,
+ BaseTypeName, AccQual, Name, TypeName);
+}
+
+void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
+ ValueKind ValueKind, StringRef TypeQual,
+ StringRef BaseTypeName, StringRef AccQual,
+ StringRef Name, StringRef TypeName) {
+ CodeObjectMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
+ auto &Arg = CodeObjectMetadata.mKernels.back().mArgs.back();
+
+ Arg.mSize = DL.getTypeAllocSize(Ty);
+ Arg.mAlign = DL.getABITypeAlignment(Ty);
+ Arg.mValueKind = ValueKind;
+ Arg.mValueType = getValueType(Ty, BaseTypeName);
+
+ if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
+ auto ElTy = PtrTy->getElementType();
+ if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS && ElTy->isSized())
+ Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy);
+ }
+
+ Arg.mAccQual = getAccessQualifier(AccQual);
+
+ if (auto PtrTy = dyn_cast<PointerType>(Ty))
+ Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
+
+ SmallVector<StringRef, 1> SplitTypeQuals;
+ TypeQual.split(SplitTypeQuals, " ", -1, false);
+ for (StringRef Key : SplitTypeQuals) {
+ auto P = StringSwitch<bool*>(Key)
+ .Case("const", &Arg.mIsConst)
+ .Case("pipe", &Arg.mIsPipe)
+ .Case("restrict", &Arg.mIsRestrict)
+ .Case("volatile", &Arg.mIsVolatile)
+ .Default(nullptr);
+ if (P)
+ *P = true;
+ }
+
+ Arg.mName = Name;
+ Arg.mTypeName = TypeName;
+}
+
+void MetadataStreamer::emitKernelCodeProps(
+ const amd_kernel_code_t &KernelCode) {
+ auto &CodeProps = CodeObjectMetadata.mKernels.back().mCodeProps;
+
+ CodeProps.mKernargSegmentSize = KernelCode.kernarg_segment_byte_size;
+ CodeProps.mWorkgroupGroupSegmentSize =
+ KernelCode.workgroup_group_segment_byte_size;
+ CodeProps.mWorkitemPrivateSegmentSize =
+ KernelCode.workitem_private_segment_byte_size;
+ CodeProps.mWavefrontNumSGPRs = KernelCode.wavefront_sgpr_count;
+ CodeProps.mWorkitemNumVGPRs = KernelCode.workitem_vgpr_count;
+ CodeProps.mKernargSegmentAlign = KernelCode.kernarg_segment_alignment;
+ CodeProps.mGroupSegmentAlign = KernelCode.group_segment_alignment;
+ CodeProps.mPrivateSegmentAlign = KernelCode.private_segment_alignment;
+ CodeProps.mWavefrontSize = KernelCode.wavefront_size;
+}
+
+void MetadataStreamer::emitKernelDebugProps(
+ const amd_kernel_code_t &KernelCode) {
+ if (!(KernelCode.code_properties & AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED))
+ return;
+
+ auto &DebugProps = CodeObjectMetadata.mKernels.back().mDebugProps;
+
+ // FIXME: Need to pass down debugger ABI version through features. This is ok
+ // for now because we only have one version.
+ DebugProps.mDebuggerABIVersion.push_back(1);
+ DebugProps.mDebuggerABIVersion.push_back(0);
+ DebugProps.mReservedNumVGPRs = KernelCode.reserved_vgpr_count;
+ DebugProps.mReservedFirstVGPR = KernelCode.reserved_vgpr_first;
+ DebugProps.mPrivateSegmentBufferSGPR =
+ KernelCode.debug_private_segment_buffer_sgpr;
+ DebugProps.mWavefrontPrivateSegmentOffsetSGPR =
+ KernelCode.debug_wavefront_private_segment_offset_sgpr;
+}
+
+void MetadataStreamer::begin(const Module &Mod) {
+ AMDGPUASI = getAMDGPUAS(Mod);
+ emitVersion();
+ emitPrintf(Mod);
+}
+
+void MetadataStreamer::emitKernel(const Function &Func,
+ const amd_kernel_code_t &KernelCode) {
+ if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
+ return;
+
+ CodeObjectMetadata.mKernels.push_back(Kernel::Metadata());
+ auto &Kernel = CodeObjectMetadata.mKernels.back();
+
+ Kernel.mName = Func.getName();
+ emitKernelLanguage(Func);
+ emitKernelAttrs(Func);
+ emitKernelArgs(Func);
+ emitKernelCodeProps(KernelCode);
+ emitKernelDebugProps(KernelCode);
+}
+
+ErrorOr<std::string> MetadataStreamer::toYamlString() {
+ std::string YamlString;
+ if (auto Error = Metadata::toYamlString(CodeObjectMetadata, YamlString))
+ return Error;
+
+ if (DumpCodeObjectMetadata)
+ dump(YamlString);
+ if (VerifyCodeObjectMetadata)
+ verify(YamlString);
+
+ return YamlString;
+}
+
+ErrorOr<std::string> MetadataStreamer::toYamlString(StringRef YamlString) {
+ if (auto Error = Metadata::fromYamlString(YamlString, CodeObjectMetadata))
+ return Error;
+
+ return toYamlString();
+}
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
new file mode 100644
index 000000000000..8d4c51763f63
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
@@ -0,0 +1,99 @@
+//===--- AMDGPUCodeObjectMetadataStreamer.h ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata Streamer.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
+
+#include "AMDGPU.h"
+#include "AMDGPUCodeObjectMetadata.h"
+#include "AMDKernelCodeT.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorOr.h"
+
+namespace llvm {
+
+class Argument;
+class DataLayout;
+class Function;
+class MDNode;
+class Module;
+class Type;
+
+namespace AMDGPU {
+namespace CodeObject {
+
+class MetadataStreamer final {
+private:
+ Metadata CodeObjectMetadata;
+ AMDGPUAS AMDGPUASI;
+
+ void dump(StringRef YamlString) const;
+
+ void verify(StringRef YamlString) const;
+
+ AccessQualifier getAccessQualifier(StringRef AccQual) const;
+
+ AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const;
+
+ ValueKind getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const;
+
+ ValueType getValueType(Type *Ty, StringRef TypeName) const;
+
+ std::string getTypeName(Type *Ty, bool Signed) const;
+
+ std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const;
+
+ void emitVersion();
+
+ void emitPrintf(const Module &Mod);
+
+ void emitKernelLanguage(const Function &Func);
+
+ void emitKernelAttrs(const Function &Func);
+
+ void emitKernelArgs(const Function &Func);
+
+ void emitKernelArg(const Argument &Arg);
+
+ void emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind,
+ StringRef TypeQual = "", StringRef BaseTypeName = "",
+ StringRef AccQual = "", StringRef Name = "",
+ StringRef TypeName = "");
+
+ void emitKernelCodeProps(const amd_kernel_code_t &KernelCode);
+
+ void emitKernelDebugProps(const amd_kernel_code_t &KernelCode);
+
+public:
+ MetadataStreamer() = default;
+ ~MetadataStreamer() = default;
+
+ void begin(const Module &Mod);
+
+ void end() {}
+
+ void emitKernel(const Function &Func, const amd_kernel_code_t &KernelCode);
+
+ ErrorOr<std::string> toYamlString();
+
+ ErrorOr<std::string> toYamlString(StringRef YamlString);
+};
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 1847d7a67328..073d19422e86 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -1,16 +1,20 @@
-//===-- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------==//
+//===- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-/// \file
//===----------------------------------------------------------------------===//
#include "AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -19,20 +23,21 @@ namespace {
class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
public:
AMDGPUELFObjectWriter(bool Is64Bit, bool HasRelocationAddend);
+
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
};
-} // End anonymous namespace
+} // end anonymous namespace
AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit,
bool HasRelocationAddend)
: MCELFObjectTargetWriter(Is64Bit,
ELF::ELFOSABI_AMDGPU_HSA,
ELF::EM_AMDGPU,
- HasRelocationAddend) { }
+ HasRelocationAddend) {}
unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
@@ -77,7 +82,6 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
llvm_unreachable("unhandled relocation type");
}
-
MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit,
bool HasRelocationAddend,
raw_pwrite_stream &OS) {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index 548bad56e174..f80b5f3a6dba 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -54,11 +54,17 @@ MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit,
#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
+#undef GET_REGINFO_ENUM
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_OPERAND_ENUM
#include "AMDGPUGenInstrInfo.inc"
+#undef GET_INSTRINFO_OPERAND_ENUM
+#undef GET_INSTRINFO_ENUM
+
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
+#undef GET_SUBTARGETINFO_ENUM
#endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
deleted file mode 100644
index 95387ad1627c..000000000000
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
+++ /dev/null
@@ -1,408 +0,0 @@
-//===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// Generates AMDGPU runtime metadata for YAML mapping.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include "AMDGPU.h"
-#include "AMDGPURuntimeMetadata.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/YAMLTraits.h"
-#include <vector>
-#include "AMDGPURuntimeMD.h"
-
-using namespace llvm;
-using namespace ::AMDGPU::RuntimeMD;
-
-static cl::opt<bool>
-DumpRuntimeMD("amdgpu-dump-rtmd",
- cl::desc("Dump AMDGPU runtime metadata"));
-
-static cl::opt<bool>
-CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden,
- cl::desc("Check AMDGPU runtime metadata YAML parser"));
-
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
-LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
-LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata)
-
-namespace llvm {
-namespace yaml {
-
-template <> struct MappingTraits<KernelArg::Metadata> {
- static void mapping(IO &YamlIO, KernelArg::Metadata &A) {
- YamlIO.mapRequired(KeyName::ArgSize, A.Size);
- YamlIO.mapRequired(KeyName::ArgAlign, A.Align);
- YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U);
- YamlIO.mapRequired(KeyName::ArgKind, A.Kind);
- YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType);
- YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string());
- YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string());
- YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL);
- YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL);
- YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0));
- YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0));
- YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0));
- YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0));
- }
- static const bool flow = true;
-};
-
-template <> struct MappingTraits<Kernel::Metadata> {
- static void mapping(IO &YamlIO, Kernel::Metadata &K) {
- YamlIO.mapRequired(KeyName::KernelName, K.Name);
- YamlIO.mapOptional(KeyName::Language, K.Language, std::string());
- YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion);
- YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize);
- YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint);
- YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string());
- YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex,
- INVALID_KERNEL_INDEX);
- YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups,
- uint8_t(0));
- YamlIO.mapRequired(KeyName::Args, K.Args);
- }
- static const bool flow = true;
-};
-
-template <> struct MappingTraits<Program::Metadata> {
- static void mapping(IO &YamlIO, Program::Metadata &Prog) {
- YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq);
- YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo);
- YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels);
- }
- static const bool flow = true;
-};
-
-} // end namespace yaml
-} // end namespace llvm
-
-// Get a vector of three integer values from MDNode \p Node;
-static std::vector<uint32_t> getThreeInt32(MDNode *Node) {
- assert(Node->getNumOperands() == 3);
- std::vector<uint32_t> V;
- for (const MDOperand &Op : Node->operands()) {
- const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
- V.push_back(CI->getZExtValue());
- }
- return V;
-}
-
-static std::string getOCLTypeName(Type *Ty, bool Signed) {
- switch (Ty->getTypeID()) {
- case Type::HalfTyID:
- return "half";
- case Type::FloatTyID:
- return "float";
- case Type::DoubleTyID:
- return "double";
- case Type::IntegerTyID: {
- if (!Signed)
- return (Twine('u') + getOCLTypeName(Ty, true)).str();
- unsigned BW = Ty->getIntegerBitWidth();
- switch (BW) {
- case 8:
- return "char";
- case 16:
- return "short";
- case 32:
- return "int";
- case 64:
- return "long";
- default:
- return (Twine('i') + Twine(BW)).str();
- }
- }
- case Type::VectorTyID: {
- VectorType *VecTy = cast<VectorType>(Ty);
- Type *EleTy = VecTy->getElementType();
- unsigned Size = VecTy->getVectorNumElements();
- return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
- }
- default:
- return "unknown";
- }
-}
-
-static KernelArg::ValueType getRuntimeMDValueType(
- Type *Ty, StringRef TypeName) {
- switch (Ty->getTypeID()) {
- case Type::HalfTyID:
- return KernelArg::F16;
- case Type::FloatTyID:
- return KernelArg::F32;
- case Type::DoubleTyID:
- return KernelArg::F64;
- case Type::IntegerTyID: {
- bool Signed = !TypeName.startswith("u");
- switch (Ty->getIntegerBitWidth()) {
- case 8:
- return Signed ? KernelArg::I8 : KernelArg::U8;
- case 16:
- return Signed ? KernelArg::I16 : KernelArg::U16;
- case 32:
- return Signed ? KernelArg::I32 : KernelArg::U32;
- case 64:
- return Signed ? KernelArg::I64 : KernelArg::U64;
- default:
- // Runtime does not recognize other integer types. Report as struct type.
- return KernelArg::Struct;
- }
- }
- case Type::VectorTyID:
- return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
- case Type::PointerTyID:
- return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
- default:
- return KernelArg::Struct;
- }
-}
-
-static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
- AMDGPUAS::AddressSpaces A) {
- switch (A) {
- case AMDGPUAS::GLOBAL_ADDRESS:
- return KernelArg::Global;
- case AMDGPUAS::CONSTANT_ADDRESS:
- return KernelArg::Constant;
- case AMDGPUAS::LOCAL_ADDRESS:
- return KernelArg::Local;
- case AMDGPUAS::FLAT_ADDRESS:
- return KernelArg::Generic;
- case AMDGPUAS::REGION_ADDRESS:
- return KernelArg::Region;
- default:
- return KernelArg::Private;
- }
-}
-
-static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL,
- Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "",
- StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "",
- StringRef AccQual = "") {
-
- KernelArg::Metadata Arg;
-
- // Set ArgSize and ArgAlign.
- Arg.Size = DL.getTypeAllocSize(T);
- Arg.Align = DL.getABITypeAlignment(T);
- if (auto PT = dyn_cast<PointerType>(T)) {
- auto ET = PT->getElementType();
- if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
- Arg.PointeeAlign = DL.getABITypeAlignment(ET);
- }
-
- // Set ArgTypeName.
- Arg.TypeName = TypeName;
-
- // Set ArgName.
- Arg.Name = ArgName;
-
- // Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe.
- SmallVector<StringRef, 1> SplitQ;
- TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
-
- for (StringRef KeyName : SplitQ) {
- auto *P = StringSwitch<uint8_t *>(KeyName)
- .Case("volatile", &Arg.IsVolatile)
- .Case("restrict", &Arg.IsRestrict)
- .Case("const", &Arg.IsConst)
- .Case("pipe", &Arg.IsPipe)
- .Default(nullptr);
- if (P)
- *P = 1;
- }
-
- // Set ArgKind.
- Arg.Kind = Kind;
-
- // Set ArgValueType.
- Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName);
-
- // Set ArgAccQual.
- if (!AccQual.empty()) {
- Arg.AccQual = StringSwitch<KernelArg::AccessQualifer>(AccQual)
- .Case("read_only", KernelArg::ReadOnly)
- .Case("write_only", KernelArg::WriteOnly)
- .Case("read_write", KernelArg::ReadWrite)
- .Default(KernelArg::AccNone);
- }
-
- // Set ArgAddrQual.
- if (auto *PT = dyn_cast<PointerType>(T)) {
- Arg.AddrQual = getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
- PT->getAddressSpace()));
- }
-
- return Arg;
-}
-
-static Kernel::Metadata getRuntimeMDForKernel(const Function &F) {
- Kernel::Metadata Kernel;
- Kernel.Name = F.getName();
- auto &M = *F.getParent();
-
- // Set Language and LanguageVersion.
- if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
- if (MD->getNumOperands() != 0) {
- auto Node = MD->getOperand(0);
- if (Node->getNumOperands() > 1) {
- Kernel.Language = "OpenCL C";
- uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
- ->getZExtValue();
- uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
- ->getZExtValue();
- Kernel.LanguageVersion.push_back(Major);
- Kernel.LanguageVersion.push_back(Minor);
- }
- }
- }
-
- const DataLayout &DL = F.getParent()->getDataLayout();
- for (auto &Arg : F.args()) {
- unsigned I = Arg.getArgNo();
- Type *T = Arg.getType();
- auto TypeName = dyn_cast<MDString>(F.getMetadata(
- "kernel_arg_type")->getOperand(I))->getString();
- auto BaseTypeName = cast<MDString>(F.getMetadata(
- "kernel_arg_base_type")->getOperand(I))->getString();
- StringRef ArgName;
- if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
- ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
- auto TypeQual = cast<MDString>(F.getMetadata(
- "kernel_arg_type_qual")->getOperand(I))->getString();
- auto AccQual = cast<MDString>(F.getMetadata(
- "kernel_arg_access_qual")->getOperand(I))->getString();
- KernelArg::Kind Kind;
- if (TypeQual.find("pipe") != StringRef::npos)
- Kind = KernelArg::Pipe;
- else Kind = StringSwitch<KernelArg::Kind>(BaseTypeName)
- .Case("sampler_t", KernelArg::Sampler)
- .Case("queue_t", KernelArg::Queue)
- .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
- "image2d_t" , "image2d_array_t", KernelArg::Image)
- .Cases("image2d_depth_t", "image2d_array_depth_t",
- "image2d_msaa_t", "image2d_array_msaa_t",
- "image2d_msaa_depth_t", KernelArg::Image)
- .Cases("image2d_array_msaa_depth_t", "image3d_t",
- KernelArg::Image)
- .Default(isa<PointerType>(T) ?
- (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
- KernelArg::DynamicSharedPointer :
- KernelArg::GlobalBuffer) :
- KernelArg::ByValue);
- Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind,
- BaseTypeName, TypeName, ArgName, TypeQual, AccQual));
- }
-
- // Emit hidden kernel arguments for OpenCL kernels.
- if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
- auto Int64T = Type::getInt64Ty(F.getContext());
- Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
- KernelArg::HiddenGlobalOffsetX));
- Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
- KernelArg::HiddenGlobalOffsetY));
- Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
- KernelArg::HiddenGlobalOffsetZ));
- if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
- auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
- KernelArg::Global);
- Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT,
- KernelArg::HiddenPrintfBuffer));
- }
- }
-
- // Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint.
- if (auto RWGS = F.getMetadata("reqd_work_group_size"))
- Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS);
-
- if (auto WGSH = F.getMetadata("work_group_size_hint"))
- Kernel.WorkGroupSizeHint = getThreeInt32(WGSH);
-
- if (auto VTH = F.getMetadata("vec_type_hint"))
- Kernel.VecTypeHint = getOCLTypeName(cast<ValueAsMetadata>(
- VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
- VTH->getOperand(1))->getZExtValue());
-
- return Kernel;
-}
-
-Program::Metadata::Metadata(const std::string &YAML) {
- yaml::Input Input(YAML);
- Input >> *this;
-}
-
-std::string Program::Metadata::toYAML(void) {
- std::string Text;
- raw_string_ostream Stream(Text);
- yaml::Output Output(Stream, nullptr, INT_MAX /* do not wrap line */);
- Output << *this;
- return Stream.str();
-}
-
-Program::Metadata Program::Metadata::fromYAML(const std::string &S) {
- return Program::Metadata(S);
-}
-
-// Check if the YAML string can be parsed.
-static void checkRuntimeMDYAMLString(const std::string &YAML) {
- auto P = Program::Metadata::fromYAML(YAML);
- auto S = P.toYAML();
- llvm::errs() << "AMDGPU runtime metadata parser test "
- << (YAML == S ? "passes" : "fails") << ".\n";
- if (YAML != S) {
- llvm::errs() << "First output: " << YAML << '\n'
- << "Second output: " << S << '\n';
- }
-}
-
-std::string llvm::getRuntimeMDYAMLString(Module &M) {
- Program::Metadata Prog;
- Prog.MDVersionSeq.push_back(MDVersion);
- Prog.MDVersionSeq.push_back(MDRevision);
-
- // Set PrintfInfo.
- if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
- for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
- auto Node = MD->getOperand(I);
- if (Node->getNumOperands() > 0)
- Prog.PrintfInfo.push_back(cast<MDString>(Node->getOperand(0))
- ->getString());
- }
- }
-
- // Set Kernels.
- for (auto &F: M.functions()) {
- if (!F.getMetadata("kernel_arg_type"))
- continue;
- Prog.Kernels.emplace_back(getRuntimeMDForKernel(F));
- }
-
- auto YAML = Prog.toYAML();
-
- if (DumpRuntimeMD)
- llvm::errs() << "AMDGPU runtime metadata:\n" << YAML << '\n';
-
- if (CheckRuntimeMDParser)
- checkRuntimeMDYAMLString(YAML);
-
- return YAML;
-}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
deleted file mode 100644
index a92fdd4bebc2..000000000000
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
+++ /dev/null
@@ -1,26 +0,0 @@
-//===- AMDGPURuntimeMD.h - Generate runtime metadata ---------------*- C++ -*-//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares functions for generating runtime metadata.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
-#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
-
-#include <string>
-
-namespace llvm {
-class Module;
-
-// Get runtime metadata as YAML string.
-std::string getRuntimeMDYAMLString(Module &M);
-
-}
-#endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 3392183d33c3..8dc863f723e2 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -27,7 +27,6 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/FormattedStream.h"
-#include "AMDGPURuntimeMD.h"
namespace llvm {
#include "AMDGPUPTNote.h"
@@ -36,9 +35,27 @@ namespace llvm {
using namespace llvm;
using namespace llvm::AMDGPU;
+//===----------------------------------------------------------------------===//
+// AMDGPUTargetStreamer
+//===----------------------------------------------------------------------===//
+
AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
: MCTargetStreamer(S) {}
+void AMDGPUTargetStreamer::EmitStartOfCodeObjectMetadata(const Module &Mod) {
+ CodeObjectMetadataStreamer.begin(Mod);
+}
+
+void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata(
+ const Function &Func, const amd_kernel_code_t &KernelCode) {
+ CodeObjectMetadataStreamer.emitKernel(Func, KernelCode);
+}
+
+void AMDGPUTargetStreamer::EmitEndOfCodeObjectMetadata() {
+ CodeObjectMetadataStreamer.end();
+ EmitCodeObjectMetadata(CodeObjectMetadataStreamer.toYamlString().get());
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUTargetAsmStreamer
//===----------------------------------------------------------------------===//
@@ -93,16 +110,16 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
}
-void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) {
- OS << "\t.amdgpu_runtime_metadata\n";
- OS << getRuntimeMDYAMLString(M);
- OS << "\n\t.end_amdgpu_runtime_metadata\n";
-}
+bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
+ auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
+ if (!VerifiedYamlString)
+ return false;
-void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(StringRef Metadata) {
- OS << "\t.amdgpu_runtime_metadata";
- OS << Metadata;
- OS << "\t.end_amdgpu_runtime_metadata\n";
+ OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin << '\n';
+ OS << VerifiedYamlString.get();
+ OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd << '\n';
+
+ return true;
}
//===----------------------------------------------------------------------===//
@@ -116,22 +133,21 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
}
-void
-AMDGPUTargetELFStreamer::EmitAMDGPUNote(const MCExpr* DescSZ,
- PT_NOTE::NoteType Type,
- std::function<void(MCELFStreamer &)> EmitDesc) {
+void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
+ const MCExpr *DescSZ, ElfNote::NoteType Type,
+ function_ref<void(MCELFStreamer &)> EmitDesc) {
auto &S = getStreamer();
auto &Context = S.getContext();
- auto NameSZ = sizeof(PT_NOTE::NoteName);
+ auto NameSZ = sizeof(ElfNote::NoteName);
S.PushSection();
S.SwitchSection(Context.getELFSection(
- PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
+ ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
S.EmitIntValue(NameSZ, 4); // namesz
S.EmitValue(DescSZ, 4); // descz
- S.EmitIntValue(Type, 4); // type
- S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name
+ S.EmitIntValue(Type, 4); // type
+ S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
EmitDesc(S); // desc
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
@@ -144,7 +160,7 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
EmitAMDGPUNote(
MCConstantExpr::create(8, getContext()),
- PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
+ ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
[&](MCELFStreamer &OS){
OS.EmitIntValue(Major, 4);
OS.EmitIntValue(Minor, 4);
@@ -160,14 +176,14 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
StringRef ArchName) {
uint16_t VendorNameSize = VendorName.size() + 1;
uint16_t ArchNameSize = ArchName.size() + 1;
-
+
unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
VendorNameSize + ArchNameSize;
EmitAMDGPUNote(
MCConstantExpr::create(DescSZ, getContext()),
- PT_NOTE::NT_AMDGPU_HSA_ISA,
+ ElfNote::NT_AMDGPU_HSA_ISA,
[&](MCELFStreamer &OS) {
OS.EmitIntValue(VendorNameSize, 2);
OS.EmitIntValue(ArchNameSize, 2);
@@ -216,7 +232,11 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
Symbol->setBinding(ELF::STB_GLOBAL);
}
-void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) {
+bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
+ auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
+ if (!VerifiedYamlString)
+ return false;
+
// Create two labels to mark the beginning and end of the desc field
// and a MCExpr to calculate the size of the desc field.
auto &Context = getContext();
@@ -228,15 +248,13 @@ void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) {
EmitAMDGPUNote(
DescSZ,
- PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA,
+ ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_METADATA,
[&](MCELFStreamer &OS) {
OS.EmitLabel(DescBegin);
- OS.EmitBytes(Metadata);
+ OS.EmitBytes(VerifiedYamlString.get());
OS.EmitLabel(DescEnd);
}
);
-}
-void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) {
- EmitRuntimeMetadata(getRuntimeMDYAMLString(M));
+ return true;
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index e2f20586903d..5c588bbded9c 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -10,6 +10,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
+#include "AMDGPUCodeObjectMetadataStreamer.h"
#include "AMDKernelCodeT.h"
#include "llvm/MC/MCStreamer.h"
@@ -26,6 +27,7 @@ class Type;
class AMDGPUTargetStreamer : public MCTargetStreamer {
protected:
+ AMDGPU::CodeObject::MetadataStreamer CodeObjectMetadataStreamer;
MCContext &getContext() const { return Streamer.getContext(); }
public:
@@ -46,12 +48,18 @@ public:
virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
- virtual void EmitRuntimeMetadata(Module &M) = 0;
+ virtual void EmitStartOfCodeObjectMetadata(const Module &Mod);
- virtual void EmitRuntimeMetadata(StringRef Metadata) = 0;
+ virtual void EmitKernelCodeObjectMetadata(
+ const Function &Func, const amd_kernel_code_t &KernelCode);
+
+ virtual void EmitEndOfCodeObjectMetadata();
+
+ /// \returns True on success, false on failure.
+ virtual bool EmitCodeObjectMetadata(StringRef YamlString) = 0;
};
-class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
+class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
formatted_raw_ostream &OS;
public:
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
@@ -70,17 +78,16 @@ public:
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
- void EmitRuntimeMetadata(Module &M) override;
-
- void EmitRuntimeMetadata(StringRef Metadata) override;
+ /// \returns True on success, false on failure.
+ bool EmitCodeObjectMetadata(StringRef YamlString) override;
};
-class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
+class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
MCStreamer &Streamer;
- void EmitAMDGPUNote(const MCExpr* DescSize,
- AMDGPU::PT_NOTE::NoteType Type,
- std::function<void(MCELFStreamer &)> EmitDesc);
+ void EmitAMDGPUNote(const MCExpr *DescSize,
+ AMDGPU::ElfNote::NoteType Type,
+ function_ref<void(MCELFStreamer &)> EmitDesc);
public:
AMDGPUTargetELFStreamer(MCStreamer &S);
@@ -102,9 +109,8 @@ public:
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
- void EmitRuntimeMetadata(Module &M) override;
-
- void EmitRuntimeMetadata(StringRef Metadata) override;
+ /// \returns True on success, false on failure.
+ bool EmitCodeObjectMetadata(StringRef YamlString) override;
};
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 8a6d00ce69ed..09e3efad10af 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -1,13 +1,12 @@
-
add_llvm_library(LLVMAMDGPUDesc
AMDGPUAsmBackend.cpp
+ AMDGPUCodeObjectMetadataStreamer.cpp
AMDGPUELFObjectWriter.cpp
AMDGPUELFStreamer.cpp
+ AMDGPUMCAsmInfo.cpp
AMDGPUMCCodeEmitter.cpp
AMDGPUMCTargetDesc.cpp
- AMDGPUMCAsmInfo.cpp
- AMDGPURuntimeMD.cpp
AMDGPUTargetStreamer.cpp
R600MCCodeEmitter.cpp
SIMCCodeEmitter.cpp
- )
+)
diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 0c5bb0648a16..bda0928036fd 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -220,13 +220,35 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
Imm = MO.getImm();
}
- switch (AMDGPU::getOperandSize(OpInfo)) {
- case 4:
+ switch (OpInfo.OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
- case 8:
+
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
- case 2:
+
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ // FIXME Is this correct? What do inline immediates do on SI for f16 src
+ // which does not have f16 support?
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
+
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+ uint16_t Lo16 = static_cast<uint16_t>(Imm);
+ assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
+ uint32_t Encoding = getLit16Encoding(Lo16, STI);
+ assert(Encoding != 255 && "packed constants can only be inline immediates");
+ return Encoding;
+ }
default:
llvm_unreachable("invalid operand size");
}
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td
index 46803e555711..a515eecc222a 100644
--- a/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/lib/Target/AMDGPU/MIMGInstructions.td
@@ -475,106 +475,6 @@ class ImageAtomicCmpSwapPattern<MIMG opcode, ValueType vt> : Pat <
sub0)
>;
-// ======= SI Image Intrinsics ================
-
-// Image load
-defm : ImagePatterns<int_SI_image_load, "IMAGE_LOAD">;
-defm : ImagePatterns<int_SI_image_load_mip, "IMAGE_LOAD_MIP">;
-def : ImagePattern<int_SI_getresinfo, IMAGE_GET_RESINFO_V4_V1, i32>;
-
-// Basic sample
-defm : SampleRawPatterns<int_SI_image_sample, "IMAGE_SAMPLE">;
-defm : SampleRawPatterns<int_SI_image_sample_cl, "IMAGE_SAMPLE_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_d, "IMAGE_SAMPLE_D">;
-defm : SampleRawPatterns<int_SI_image_sample_d_cl, "IMAGE_SAMPLE_D_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_l, "IMAGE_SAMPLE_L">;
-defm : SampleRawPatterns<int_SI_image_sample_b, "IMAGE_SAMPLE_B">;
-defm : SampleRawPatterns<int_SI_image_sample_b_cl, "IMAGE_SAMPLE_B_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_lz, "IMAGE_SAMPLE_LZ">;
-defm : SampleRawPatterns<int_SI_image_sample_cd, "IMAGE_SAMPLE_CD">;
-defm : SampleRawPatterns<int_SI_image_sample_cd_cl, "IMAGE_SAMPLE_CD_CL">;
-
-// Sample with comparison
-defm : SampleRawPatterns<int_SI_image_sample_c, "IMAGE_SAMPLE_C">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cl, "IMAGE_SAMPLE_C_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d, "IMAGE_SAMPLE_C_D">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d_cl, "IMAGE_SAMPLE_C_D_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_c_l, "IMAGE_SAMPLE_C_L">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b, "IMAGE_SAMPLE_C_B">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b_cl, "IMAGE_SAMPLE_C_B_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_c_lz, "IMAGE_SAMPLE_C_LZ">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd, "IMAGE_SAMPLE_C_CD">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl, "IMAGE_SAMPLE_C_CD_CL">;
-
-// Sample with offsets
-defm : SampleRawPatterns<int_SI_image_sample_o, "IMAGE_SAMPLE_O">;
-defm : SampleRawPatterns<int_SI_image_sample_cl_o, "IMAGE_SAMPLE_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_d_o, "IMAGE_SAMPLE_D_O">;
-defm : SampleRawPatterns<int_SI_image_sample_d_cl_o, "IMAGE_SAMPLE_D_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_l_o, "IMAGE_SAMPLE_L_O">;
-defm : SampleRawPatterns<int_SI_image_sample_b_o, "IMAGE_SAMPLE_B_O">;
-defm : SampleRawPatterns<int_SI_image_sample_b_cl_o, "IMAGE_SAMPLE_B_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_lz_o, "IMAGE_SAMPLE_LZ_O">;
-defm : SampleRawPatterns<int_SI_image_sample_cd_o, "IMAGE_SAMPLE_CD_O">;
-defm : SampleRawPatterns<int_SI_image_sample_cd_cl_o, "IMAGE_SAMPLE_CD_CL_O">;
-
-// Sample with comparison and offsets
-defm : SampleRawPatterns<int_SI_image_sample_c_o, "IMAGE_SAMPLE_C_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cl_o, "IMAGE_SAMPLE_C_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d_o, "IMAGE_SAMPLE_C_D_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d_cl_o, "IMAGE_SAMPLE_C_D_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_l_o, "IMAGE_SAMPLE_C_L_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b_o, "IMAGE_SAMPLE_C_B_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b_cl_o, "IMAGE_SAMPLE_C_B_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_lz_o, "IMAGE_SAMPLE_C_LZ_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd_o, "IMAGE_SAMPLE_C_CD_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">;
-
-// Gather opcodes
-// Only the variants which make sense are defined.
-def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V2, v2i32>;
-def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_cl, IMAGE_GATHER4_CL_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_l, IMAGE_GATHER4_L_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_b, IMAGE_GATHER4_B_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V2, v2i32>;
-def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V4, v4i32>;
-
-def : SampleRawPattern<int_SI_gather4_c, IMAGE_GATHER4_C_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b_cl, IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_lz, IMAGE_GATHER4_C_LZ_V4_V4, v4i32>;
-
-def : SampleRawPattern<int_SI_gather4_o, IMAGE_GATHER4_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_b_cl_o, IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_lz_o, IMAGE_GATHER4_LZ_O_V4_V4, v4i32>;
-
-def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_cl_o, IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_l_o, IMAGE_GATHER4_C_L_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b_o, IMAGE_GATHER4_C_B_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b_cl_o, IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
-
-def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>;
-def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>;
-def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;
-
// ======= amdgcn Image Intrinsics ==============
// Image load
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td
index 3c07cc76b9a1..0e4eda982139 100644
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@@ -187,3 +187,10 @@ def : ProcessorModel<"gfx810", SIQuarterSpeedModel,
[FeatureISAVersion8_1_0]
>;
+def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
+ [FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32]
+>;
+
+def : ProcessorModel<"gfx901", SIQuarterSpeedModel,
+ [FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32]
+>;
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 45b36d3d3ebb..811b905588b4 100644
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -19,10 +19,26 @@
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <new>
+#include <set>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -43,13 +59,12 @@ struct CFStack {
std::vector<StackItem> BranchStack;
std::vector<StackItem> LoopStack;
unsigned MaxStackSize;
- unsigned CurrentEntries;
- unsigned CurrentSubEntries;
+ unsigned CurrentEntries = 0;
+ unsigned CurrentSubEntries = 0;
CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
// We need to reserve a stack entry for CALL_FS in vertex shaders.
- MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0),
- CurrentEntries(0), CurrentSubEntries(0) { }
+ MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
unsigned getLoopDepth();
bool branchStackContains(CFStack::StackItem);
@@ -198,9 +213,8 @@ void CFStack::popLoop() {
}
class R600ControlFlowFinalizer : public MachineFunctionPass {
-
private:
- typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
+ typedef std::pair<MachineInstr *, std::vector<MachineInstr *>> ClauseFile;
enum ControlFlowInstruction {
CF_TC,
@@ -217,10 +231,10 @@ private:
};
static char ID;
- const R600InstrInfo *TII;
- const R600RegisterInfo *TRI;
+ const R600InstrInfo *TII = nullptr;
+ const R600RegisterInfo *TRI = nullptr;
unsigned MaxFetchInst;
- const R600Subtarget *ST;
+ const R600Subtarget *ST = nullptr;
bool IsTrivialInst(MachineInstr &MI) const {
switch (MI.getOpcode()) {
@@ -355,7 +369,7 @@ private:
continue;
int64_t Imm = Src.second;
std::vector<MachineOperand *>::iterator It =
- find_if(Lits, [&](MachineOperand *val) {
+ llvm::find_if(Lits, [&](MachineOperand *val) {
return val->isImm() && (val->getImm() == Imm);
});
@@ -485,8 +499,7 @@ private:
}
public:
- R600ControlFlowFinalizer(TargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
+ R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override {
ST = &MF.getSubtarget<R600Subtarget>();
@@ -501,7 +514,7 @@ public:
++MB) {
MachineBasicBlock &MBB = *MB;
unsigned CfCount = 0;
- std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
+ std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
std::vector<MachineInstr * > IfThenElseStack;
if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_VS) {
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
@@ -554,7 +567,7 @@ public:
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_WHILE_LOOP))
.addImm(1);
- std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
+ std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
std::set<MachineInstr *>());
Pair.second.insert(MIb);
LoopStack.push_back(std::move(Pair));
@@ -564,7 +577,7 @@ public:
}
case AMDGPU::ENDLOOP: {
CFStack.popLoop();
- std::pair<unsigned, std::set<MachineInstr *> > Pair =
+ std::pair<unsigned, std::set<MachineInstr *>> Pair =
std::move(LoopStack.back());
LoopStack.pop_back();
CounterPropagateAddr(Pair.second, CfCount);
@@ -693,7 +706,6 @@ char R600ControlFlowFinalizer::ID = 0;
} // end anonymous namespace
-
-llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
+FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
return new R600ControlFlowFinalizer(TM);
}
diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 9a5db6ccc672..03fc1aff5ec1 100644
--- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -17,26 +17,37 @@
#include "AMDGPU.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
#include "AMDGPUSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
+#include <vector>
using namespace llvm;
namespace llvm {
+
void initializeR600EmitClauseMarkersPass(PassRegistry&);
-}
+
+} // end namespace llvm
namespace {
class R600EmitClauseMarkers : public MachineFunctionPass {
-
private:
- const R600InstrInfo *TII;
- int Address;
+ const R600InstrInfo *TII = nullptr;
+ int Address = 0;
unsigned OccupiedDwords(MachineInstr &MI) const {
switch (MI.getOpcode()) {
@@ -118,7 +129,7 @@ private:
SubstituteKCacheBank(MachineInstr &MI,
std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
bool UpdateInstr = true) const {
- std::vector<std::pair<unsigned, unsigned> > UsedKCache;
+ std::vector<std::pair<unsigned, unsigned>> UsedKCache;
if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != AMDGPU::DOT_4)
return true;
@@ -181,10 +192,11 @@ private:
bool canClauseLocalKillFitInClause(
unsigned AluInstCount,
- std::vector<std::pair<unsigned, unsigned> > KCacheBanks,
+ std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
MachineBasicBlock::iterator Def,
MachineBasicBlock::iterator BBEnd) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
+ //TODO: change this to defs?
for (MachineInstr::const_mop_iterator
MOI = Def->operands_begin(),
MOE = Def->operands_end(); MOI != MOE; ++MOI) {
@@ -207,15 +219,17 @@ private:
if (AluInstCount >= TII->getMaxAlusPerClause())
return false;
+ // TODO: Is this true? kill flag appears to work OK below
// Register kill flags have been cleared by the time we get to this
// pass, but it is safe to assume that all uses of this register
// occur in the same basic block as its definition, because
// it is illegal for the scheduler to schedule them in
// different blocks.
- if (UseI->findRegisterUseOperandIdx(MOI->getReg()))
+ if (UseI->readsRegister(MOI->getReg()))
LastUseCount = AluInstCount;
- if (UseI != Def && UseI->findRegisterDefOperandIdx(MOI->getReg()) != -1)
+ // Exit early if the current use kills the register
+ if (UseI != Def && UseI->killsRegister(MOI->getReg()))
break;
}
if (LastUseCount)
@@ -228,7 +242,7 @@ private:
MachineBasicBlock::iterator
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator ClauseHead = I;
- std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
+ std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
bool PushBeforeModifier = false;
unsigned AluInstCount = 0;
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
@@ -294,8 +308,8 @@ private:
public:
static char ID;
- R600EmitClauseMarkers() : MachineFunctionPass(ID), TII(nullptr), Address(0) {
+ R600EmitClauseMarkers() : MachineFunctionPass(ID) {
initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
}
@@ -310,9 +324,11 @@ public:
if (I != MBB.end() && I->getOpcode() == AMDGPU::CF_ALU)
continue; // BB was already parsed
for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
- if (isALU(*I))
- I = MakeALUClause(MBB, I);
- else
+ if (isALU(*I)) {
+ auto next = MakeALUClause(MBB, I);
+ assert(next != I);
+ I = next;
+ } else
++I;
}
}
@@ -333,7 +349,6 @@ INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
"R600 Emit Clause Markters", false, false)
-llvm::FunctionPass *llvm::createR600EmitClauseMarkers() {
+FunctionPass *llvm::createR600EmitClauseMarkers() {
return new R600EmitClauseMarkers();
}
-
diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp
index 5813786abe01..1f01ad732e00 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.cpp
+++ b/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -8,7 +8,43 @@
//==-----------------------------------------------------------------------===//
#include "R600FrameLowering.h"
+#include "AMDGPUSubtarget.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
R600FrameLowering::~R600FrameLowering() = default;
+
+/// \returns The number of registers allocated for \p FI.
+int R600FrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const R600RegisterInfo *RI
+ = MF.getSubtarget<R600Subtarget>().getRegisterInfo();
+
+ // Fill in FrameReg output argument.
+ FrameReg = RI->getFrameRegister(MF);
+
+ // Start the offset at 2 so we don't overwrite work group information.
+ // FIXME: We should only do this when the shader actually uses this
+ // information.
+ unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4);
+ int UpperBound = FI == -1 ? MFI.getNumObjects() : FI;
+
+ for (int i = MFI.getObjectIndexBegin(); i < UpperBound; ++i) {
+ OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(i));
+ OffsetBytes += MFI.getObjectSize(i);
+ // Each register holds 4 bytes, so we must always align the offset to at
+ // least 4 bytes, so that 2 frame objects won't share the same register.
+ OffsetBytes = alignTo(OffsetBytes, 4);
+ }
+
+ if (FI != -1)
+ OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(FI));
+
+ return OffsetBytes / (getStackWidth(MF) * 4);
+}
diff --git a/lib/Target/AMDGPU/R600FrameLowering.h b/lib/Target/AMDGPU/R600FrameLowering.h
index 874435f35ce4..142f70967eda 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.h
+++ b/lib/Target/AMDGPU/R600FrameLowering.h
@@ -25,6 +25,8 @@ public:
MachineBasicBlock &MBB) const override {}
void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const override {}
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
};
} // end namespace llvm
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 77fee4356b65..3590a9b05e1d 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -221,6 +221,15 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SUBE, VT, Expand);
}
+ // LLVM will expand these to atomic_cmp_swap(0)
+ // and atomic_swap, respectively.
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
+ // We need to custom lower some of the intrinsics
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
setSchedulingPreference(Sched::Source);
setTargetDAGCombine(ISD::FP_ROUND);
@@ -266,7 +275,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
- NewMI.addOperand(MI.getOperand(i));
+ NewMI.add(MI.getOperand(i));
}
} else {
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
@@ -339,34 +348,34 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
.addImm(isEOP(I)); // Set End of program bit
break;
case AMDGPU::RAT_STORE_TYPED_eg:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
.addImm(isEOP(I)); // Set End of program bit
break;
case AMDGPU::BRANCH:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
- .addOperand(MI.getOperand(0));
+ .add(MI.getOperand(0));
break;
case AMDGPU::BRANCH_COND_f32: {
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
AMDGPU::PREDICATE_BIT)
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addImm(AMDGPU::PRED_SETNE)
.addImm(0); // Flags
TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
}
@@ -375,12 +384,12 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
AMDGPU::PREDICATE_BIT)
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addImm(AMDGPU::PRED_SETNE_INT)
.addImm(0); // Flags
TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
}
@@ -408,13 +417,13 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return BB;
unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
- .addOperand(MI.getOperand(3))
- .addOperand(MI.getOperand(4))
- .addOperand(MI.getOperand(5))
- .addOperand(MI.getOperand(6))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5))
+ .add(MI.getOperand(6))
.addImm(CfInst)
.addImm(EOP);
break;
@@ -490,8 +499,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
SDLoc DL(Op);
- switch(IntrinsicID) {
- default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ switch (IntrinsicID) {
case AMDGPUIntrinsic::r600_tex:
case AMDGPUIntrinsic::r600_texc: {
unsigned TextureOp;
@@ -552,7 +560,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
}
case Intrinsic::r600_implicitarg_ptr: {
- MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
+ MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
return DAG.getConstant(ByteOffset, DL, PtrVT);
}
@@ -599,6 +607,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::r600_recipsqrt_clamped:
return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
+ default:
+ return Op;
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
@@ -702,12 +712,12 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
- if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
+ if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
const DataLayout &DL = DAG.getDataLayout();
const GlobalValue *GV = GSD->getGlobal();
- MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
+ MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
@@ -864,7 +874,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
unsigned DwordOffset) const {
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::CONSTANT_BUFFER_0);
+ AMDGPUASI.CONSTANT_BUFFER_0);
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
@@ -911,7 +921,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
if (VT == MVT::f32) {
DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
- SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
+ SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
if (MinMax)
return MinMax;
}
@@ -1102,7 +1112,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
//TODO: Who creates the i8 stores?
assert(Store->isTruncatingStore()
|| Store->getValue().getValueType() == MVT::i8);
- assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
+ assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS);
SDValue Mask;
if (Store->getMemoryVT() == MVT::i8) {
@@ -1200,9 +1210,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
// Neither LOCAL nor PRIVATE can do vectors at the moment
- if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
+ if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
VT.isVector()) {
- if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
+ if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
+ StoreNode->isTruncatingStore()) {
// Add an extra level of chain to isolate this vector
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
// TODO: can the chain be replaced without creating a new store?
@@ -1225,7 +1236,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
DAG.getConstant(2, DL, PtrVT));
- if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
+ if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
// It is beneficial to create MSKOR here instead of combiner to avoid
// artificial dependencies introduced by RMW
if (StoreNode->isTruncatingStore()) {
@@ -1278,7 +1289,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
}
// GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
- if (AS != AMDGPUAS::PRIVATE_ADDRESS)
+ if (AS != AMDGPUASI.PRIVATE_ADDRESS)
return SDValue();
if (MemVT.bitsLT(MVT::i32))
@@ -1297,39 +1308,39 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// return (512 + (kc_bank << 12)
static int
-ConstantAddressBlock(unsigned AddressSpace) {
+ConstantAddressBlock(unsigned AddressSpace, AMDGPUAS AMDGPUASI) {
switch (AddressSpace) {
- case AMDGPUAS::CONSTANT_BUFFER_0:
+ case AMDGPUASI.CONSTANT_BUFFER_0:
return 512;
- case AMDGPUAS::CONSTANT_BUFFER_1:
+ case AMDGPUASI.CONSTANT_BUFFER_1:
return 512 + 4096;
- case AMDGPUAS::CONSTANT_BUFFER_2:
+ case AMDGPUASI.CONSTANT_BUFFER_2:
return 512 + 4096 * 2;
- case AMDGPUAS::CONSTANT_BUFFER_3:
+ case AMDGPUASI.CONSTANT_BUFFER_3:
return 512 + 4096 * 3;
- case AMDGPUAS::CONSTANT_BUFFER_4:
+ case AMDGPUASI.CONSTANT_BUFFER_4:
return 512 + 4096 * 4;
- case AMDGPUAS::CONSTANT_BUFFER_5:
+ case AMDGPUASI.CONSTANT_BUFFER_5:
return 512 + 4096 * 5;
- case AMDGPUAS::CONSTANT_BUFFER_6:
+ case AMDGPUASI.CONSTANT_BUFFER_6:
return 512 + 4096 * 6;
- case AMDGPUAS::CONSTANT_BUFFER_7:
+ case AMDGPUASI.CONSTANT_BUFFER_7:
return 512 + 4096 * 7;
- case AMDGPUAS::CONSTANT_BUFFER_8:
+ case AMDGPUASI.CONSTANT_BUFFER_8:
return 512 + 4096 * 8;
- case AMDGPUAS::CONSTANT_BUFFER_9:
+ case AMDGPUASI.CONSTANT_BUFFER_9:
return 512 + 4096 * 9;
- case AMDGPUAS::CONSTANT_BUFFER_10:
+ case AMDGPUASI.CONSTANT_BUFFER_10:
return 512 + 4096 * 10;
- case AMDGPUAS::CONSTANT_BUFFER_11:
+ case AMDGPUASI.CONSTANT_BUFFER_11:
return 512 + 4096 * 11;
- case AMDGPUAS::CONSTANT_BUFFER_12:
+ case AMDGPUASI.CONSTANT_BUFFER_12:
return 512 + 4096 * 12;
- case AMDGPUAS::CONSTANT_BUFFER_13:
+ case AMDGPUASI.CONSTANT_BUFFER_13:
return 512 + 4096 * 13;
- case AMDGPUAS::CONSTANT_BUFFER_14:
+ case AMDGPUASI.CONSTANT_BUFFER_14:
return 512 + 4096 * 14;
- case AMDGPUAS::CONSTANT_BUFFER_15:
+ case AMDGPUASI.CONSTANT_BUFFER_15:
return 512 + 4096 * 15;
default:
return -1;
@@ -1397,7 +1408,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
EVT MemVT = LoadNode->getMemoryVT();
ISD::LoadExtType ExtType = LoadNode->getExtensionType();
- if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
+ if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
return lowerPrivateExtLoad(Op, DAG);
}
@@ -1407,13 +1418,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = LoadNode->getChain();
SDValue Ptr = LoadNode->getBasePtr();
- if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
- LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
+ if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
+ LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
VT.isVector()) {
return scalarizeVectorLoad(LoadNode, DAG);
}
- int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
+ int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace(),
+ AMDGPUASI);
if (ConstantBlock > -1 &&
((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
(LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
@@ -1445,7 +1457,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
DAG.getConstant(4, DL, MVT::i32)),
DAG.getConstant(LoadNode->getAddressSpace() -
- AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
+ AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32)
);
}
@@ -1481,7 +1493,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(MergedValues, DL);
}
- if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
return SDValue();
}
@@ -1535,7 +1547,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
SmallVector<ISD::InputArg, 8> LocalIns;
if (AMDGPU::isShader(CallConv)) {
- AnalyzeFormalArguments(CCInfo, Ins);
+ CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
} else {
analyzeFormalArgumentsCompute(CCInfo, Ins);
}
@@ -1558,7 +1570,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
}
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::CONSTANT_BUFFER_0);
+ AMDGPUASI.CONSTANT_BUFFER_0);
// i64 isn't a legal type, so the register type used ends up as i32, which
// isn't expected here. It attempts to create this sextload, but it ends up
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index e88bd076718e..2422d57269eb 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -12,16 +12,34 @@
//
//===----------------------------------------------------------------------===//
-#include "R600InstrInfo.h"
#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
-#include "AMDGPUTargetMachine.h"
#include "R600Defines.h"
-#include "R600MachineFunctionInfo.h"
+#include "R600FrameLowering.h"
+#include "R600InstrInfo.h"
#include "R600RegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -191,7 +209,7 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const {
const MachineFunction *MF = MI.getParent()->getParent();
return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
usesVertexCache(MI.getOpcode())) ||
- usesTextureCache(MI.getOpcode());
+ usesTextureCache(MI.getOpcode());
}
bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
@@ -321,7 +339,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI,
unsigned &ConstCount) const {
ConstCount = 0;
const std::pair<int, unsigned> DummyPair(-1, 0);
- std::vector<std::pair<int, unsigned> > Result;
+ std::vector<std::pair<int, unsigned>> Result;
unsigned i = 0;
for (const auto &Src : getSrcs(MI)) {
++i;
@@ -348,8 +366,8 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI,
return Result;
}
-static std::vector<std::pair<int, unsigned> >
-Swizzle(std::vector<std::pair<int, unsigned> > Src,
+static std::vector<std::pair<int, unsigned>>
+Swizzle(std::vector<std::pair<int, unsigned>> Src,
R600InstrInfo::BankSwizzle Swz) {
if (Src[0] == Src[1])
Src[1].first = -1;
@@ -404,14 +422,14 @@ static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
/// in the same Instruction Group while meeting read port limitations given a
/// Swz swizzle sequence.
unsigned R600InstrInfo::isLegalUpTo(
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+ const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs,
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
- const std::vector<std::pair<int, unsigned> > &TransSrcs,
+ const std::vector<std::pair<int, unsigned>> &TransSrcs,
R600InstrInfo::BankSwizzle TransSwz) const {
int Vector[4][3];
memset(Vector, -1, sizeof(Vector));
for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
- const std::vector<std::pair<int, unsigned> > &Srcs =
+ const std::vector<std::pair<int, unsigned>> &Srcs =
Swizzle(IGSrcs[i], Swz[i]);
for (unsigned j = 0; j < 3; j++) {
const std::pair<int, unsigned> &Src = Srcs[j];
@@ -473,9 +491,9 @@ NextPossibleSolution(
/// Enumerate all possible Swizzle sequence to find one that can meet all
/// read port requirements.
bool R600InstrInfo::FindSwizzleForVectorSlot(
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+ const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs,
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
- const std::vector<std::pair<int, unsigned> > &TransSrcs,
+ const std::vector<std::pair<int, unsigned>> &TransSrcs,
R600InstrInfo::BankSwizzle TransSwz) const {
unsigned ValidUpTo = 0;
do {
@@ -490,7 +508,7 @@ bool R600InstrInfo::FindSwizzleForVectorSlot(
/// a const, and can't read a gpr at cycle 1 if they read 2 const.
static bool
isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
- const std::vector<std::pair<int, unsigned> > &TransOps,
+ const std::vector<std::pair<int, unsigned>> &TransOps,
unsigned ConstCount) {
// TransALU can't read 3 constants
if (ConstCount > 2)
@@ -516,7 +534,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
const {
//Todo : support shared src0 - src1 operand
- std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
+ std::vector<std::vector<std::pair<int, unsigned>>> IGSrcs;
ValidSwizzle.clear();
unsigned ConstCount;
BankSwizzle TransBS = ALU_VEC_012_SCL_210;
@@ -527,7 +545,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
IG[i]->getOperand(Op).getImm());
}
- std::vector<std::pair<int, unsigned> > TransOps;
+ std::vector<std::pair<int, unsigned>> TransOps;
if (!isLastAluTrans)
return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
@@ -556,7 +574,6 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
return false;
}
-
bool
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
const {
@@ -780,7 +797,7 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
- assert(!BytesRemoved && "code size not handled");
+ assert(!BytesRemoved && "code size not handled");
// Note : we leave PRED* instructions there.
// They may be needed when predicating instructions.
@@ -852,7 +869,7 @@ bool R600InstrInfo::isPredicated(const MachineInstr &MI) const {
}
}
-bool R600InstrInfo::isPredicable(MachineInstr &MI) const {
+bool R600InstrInfo::isPredicable(const MachineInstr &MI) const {
// XXX: KILL* instructions can be predicated, but they must be the last
// instruction in a clause, so this means any instructions after them cannot
// be predicated. Until we have proper support for instruction clauses in the
@@ -863,7 +880,7 @@ bool R600InstrInfo::isPredicable(MachineInstr &MI) const {
} else if (MI.getOpcode() == AMDGPU::CF_ALU) {
// If the clause start in the middle of MBB then the MBB has more
// than a single clause, unable to predicate several clauses.
- if (MI.getParent()->begin() != MachineBasicBlock::iterator(MI))
+ if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI))
return false;
// TODO: We don't support KC merging atm
return MI.getOperand(3).getImm() == 0 && MI.getOperand(4).getImm() == 0;
@@ -874,10 +891,9 @@ bool R600InstrInfo::isPredicable(MachineInstr &MI) const {
}
}
-
bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumCyles,
+ unsigned NumCycles,
unsigned ExtraPredCycles,
BranchProbability Probability) const{
return true;
@@ -896,7 +912,7 @@ R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
bool
R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
- unsigned NumCyles,
+ unsigned NumCycles,
BranchProbability Probability)
const {
return true;
@@ -908,7 +924,6 @@ R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
return false;
}
-
bool
R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
MachineOperand &MO = Cond[1];
@@ -948,7 +963,6 @@ bool R600InstrInfo::DefinesPredicate(MachineInstr &MI,
return isPredicateSetter(MI.getOpcode());
}
-
bool R600InstrInfo::PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const {
int PIdx = MI.findFirstPredOperandIdx();
@@ -1067,7 +1081,7 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
-void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
+void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
const R600FrameLowering *TFL = ST.getFrameLowering();
diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h
index a280052dbd4a..3b828006807e 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@@ -177,12 +177,12 @@ public:
bool isPredicated(const MachineInstr &MI) const override;
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
- bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
BranchProbability Probability) const override;
- bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
BranchProbability Probability) const override ;
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 9210e66b0fe7..bac557ba989e 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -316,7 +316,7 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
[{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
- (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
+ (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }]
>;
def vtx_id3_az_extloadi8 : LoadParamFrag<az_extloadi8>;
@@ -326,8 +326,8 @@ def vtx_id3_load : LoadParamFrag<load>;
class LoadVtxId1 <PatFrag load> : PatFrag <
(ops node:$ptr), (load node:$ptr), [{
const MemSDNode *LD = cast<MemSDNode>(N);
- return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
- (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+ return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
+ (LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
!isa<GlobalValue>(GetUnderlyingObject(
LD->getMemOperand()->getValue(), CurDAG->getDataLayout())));
}]>;
@@ -339,7 +339,7 @@ def vtx_id1_load : LoadVtxId1 <load>;
class LoadVtxId2 <PatFrag load> : PatFrag <
(ops node:$ptr), (load node:$ptr), [{
const MemSDNode *LD = cast<MemSDNode>(N);
- return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+ return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
isa<GlobalValue>(GetUnderlyingObject(
LD->getMemOperand()->getValue(), CurDAG->getDataLayout()));
}]>;
@@ -1013,7 +1013,7 @@ multiclass CUBE_Common <bits<11> inst> {
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src0),
"CUBE $dst $src0",
- [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))],
+ [(set v4f32:$dst, (int_r600_cube v4f32:$src0))],
VecALU
> {
let isPseudo = 1;
diff --git a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index d70f52e0f295..b7e62075244b 100644
--- a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -34,15 +35,6 @@ namespace {
typedef std::pair<BasicBlock *, Value *> StackEntry;
typedef SmallVector<StackEntry, 16> StackVector;
-// Intrinsic names the control flow is annotated with
-static const char *const IfIntrinsic = "llvm.amdgcn.if";
-static const char *const ElseIntrinsic = "llvm.amdgcn.else";
-static const char *const BreakIntrinsic = "llvm.amdgcn.break";
-static const char *const IfBreakIntrinsic = "llvm.amdgcn.if.break";
-static const char *const ElseBreakIntrinsic = "llvm.amdgcn.else.break";
-static const char *const LoopIntrinsic = "llvm.amdgcn.loop";
-static const char *const EndCfIntrinsic = "llvm.amdgcn.end.cf";
-
class SIAnnotateControlFlow : public FunctionPass {
DivergenceAnalysis *DA;
@@ -56,13 +48,13 @@ class SIAnnotateControlFlow : public FunctionPass {
UndefValue *BoolUndef;
Constant *Int64Zero;
- Constant *If;
- Constant *Else;
- Constant *Break;
- Constant *IfBreak;
- Constant *ElseBreak;
- Constant *Loop;
- Constant *EndCf;
+ Function *If;
+ Function *Else;
+ Function *Break;
+ Function *IfBreak;
+ Function *ElseBreak;
+ Function *Loop;
+ Function *EndCf;
DominatorTree *DT;
StackVector Stack;
@@ -86,7 +78,8 @@ class SIAnnotateControlFlow : public FunctionPass {
void insertElse(BranchInst *Term);
Value *handleLoopCondition(Value *Cond, PHINode *Broken,
- llvm::Loop *L, BranchInst *Term);
+ llvm::Loop *L, BranchInst *Term,
+ SmallVectorImpl<WeakVH> &LoopPhiConditions);
void handleLoop(BranchInst *Term);
@@ -118,6 +111,7 @@ public:
INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
@@ -138,30 +132,13 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {
BoolUndef = UndefValue::get(Boolean);
Int64Zero = ConstantInt::get(Int64, 0);
- If = M.getOrInsertFunction(
- IfIntrinsic, ReturnStruct, Boolean, (Type *)nullptr);
-
- Else = M.getOrInsertFunction(
- ElseIntrinsic, ReturnStruct, Int64, (Type *)nullptr);
-
- Break = M.getOrInsertFunction(
- BreakIntrinsic, Int64, Int64, (Type *)nullptr);
- cast<Function>(Break)->setDoesNotAccessMemory();
-
- IfBreak = M.getOrInsertFunction(
- IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)nullptr);
- cast<Function>(IfBreak)->setDoesNotAccessMemory();;
-
- ElseBreak = M.getOrInsertFunction(
- ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)nullptr);
- cast<Function>(ElseBreak)->setDoesNotAccessMemory();
-
- Loop = M.getOrInsertFunction(
- LoopIntrinsic, Boolean, Int64, (Type *)nullptr);
-
- EndCf = M.getOrInsertFunction(
- EndCfIntrinsic, Void, Int64, (Type *)nullptr);
-
+ If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if);
+ Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else);
+ Break = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_break);
+ IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break);
+ ElseBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else_break);
+ Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop);
+ EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf);
return false;
}
@@ -208,15 +185,16 @@ bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
// \brief Erase "Phi" if it is not used any more
void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
- if (!Phi->hasNUsesOrMore(1))
- Phi->eraseFromParent();
+ if (llvm::RecursivelyDeleteDeadPHINode(Phi)) {
+ DEBUG(dbgs() << "Erased unused condition phi\n");
+ }
}
/// \brief Open a new "If" block
void SIAnnotateControlFlow::openIf(BranchInst *Term) {
- if (isUniform(Term)) {
+ if (isUniform(Term))
return;
- }
+
Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
@@ -233,8 +211,10 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
}
/// \brief Recursively handle the condition leading to a loop
-Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
- llvm::Loop *L, BranchInst *Term) {
+Value *SIAnnotateControlFlow::handleLoopCondition(
+ Value *Cond, PHINode *Broken,
+ llvm::Loop *L, BranchInst *Term,
+ SmallVectorImpl<WeakVH> &LoopPhiConditions) {
// Only search through PHI nodes which are inside the loop. If we try this
// with PHI nodes that are outside of the loop, we end up inserting new PHI
@@ -245,7 +225,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) {
BasicBlock *Parent = Phi->getParent();
- PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
+ PHINode *NewPhi = PHINode::Create(Int64, 0, "loop.phi", &Parent->front());
Value *Ret = NewPhi;
// Handle all non-constant incoming values first
@@ -258,14 +238,14 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
}
Phi->setIncomingValue(i, BoolFalse);
- Value *PhiArg = handleLoopCondition(Incoming, Broken, L, Term);
+ Value *PhiArg = handleLoopCondition(Incoming, Broken, L,
+ Term, LoopPhiConditions);
NewPhi->addIncoming(PhiArg, From);
}
BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
-
Value *Incoming = Phi->getIncomingValue(i);
if (Incoming != BoolTrue)
continue;
@@ -295,14 +275,17 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
continue;
}
}
+
TerminatorInst *Insert = From->getTerminator();
Value *PhiArg = CallInst::Create(Break, Broken, "", Insert);
NewPhi->setIncomingValue(i, PhiArg);
}
- eraseIfUnused(Phi);
+
+ LoopPhiConditions.push_back(WeakVH(Phi));
return Ret;
+ }
- } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
+ if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
BasicBlock *Parent = Inst->getParent();
Instruction *Insert;
if (L->contains(Inst)) {
@@ -310,46 +293,55 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
} else {
Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
}
+
Value *Args[] = { Cond, Broken };
return CallInst::Create(IfBreak, Args, "", Insert);
+ }
- // Insert IfBreak before TERM for constant COND.
- } else if (isa<ConstantInt>(Cond)) {
- Value *Args[] = { Cond, Broken };
- return CallInst::Create(IfBreak, Args, "", Term);
+ // Insert IfBreak in the loop header TERM for constant COND other than true.
+ if (isa<Constant>(Cond)) {
+ Instruction *Insert = Cond == BoolTrue ?
+ Term : L->getHeader()->getTerminator();
- } else {
- llvm_unreachable("Unhandled loop condition!");
+ Value *Args[] = { Cond, Broken };
+ return CallInst::Create(IfBreak, Args, "", Insert);
}
- return nullptr;
+
+ llvm_unreachable("Unhandled loop condition!");
}
/// \brief Handle a back edge (loop)
void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
- if (isUniform(Term)) {
+ if (isUniform(Term))
return;
- }
BasicBlock *BB = Term->getParent();
llvm::Loop *L = LI->getLoopFor(BB);
if (!L)
return;
+
BasicBlock *Target = Term->getSuccessor(1);
- PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
+ PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front());
+ SmallVector<WeakVH, 8> LoopPhiConditions;
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
- Value *Arg = handleLoopCondition(Cond, Broken, L, Term);
+ Value *Arg = handleLoopCondition(Cond, Broken, L, Term, LoopPhiConditions);
- for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
- PI != PE; ++PI) {
+ for (BasicBlock *Pred : predecessors(Target))
+ Broken->addIncoming(Pred == BB ? Arg : Int64Zero, Pred);
+
+ Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
- Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI);
+ for (WeakVH Val : reverse(LoopPhiConditions)) {
+ if (PHINode *Cond = cast_or_null<PHINode>(Val))
+ eraseIfUnused(Cond);
}
- Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
push(Term->getSuccessor(0), Arg);
-}/// \brief Close the last opened control flow
+}
+
+/// \brief Close the last opened control flow
void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
llvm::Loop *L = LI->getLoopFor(BB);
@@ -359,59 +351,62 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
// We can't insert an EndCF call into a loop header, because it will
// get executed on every iteration of the loop, when it should be
// executed only once before the loop.
- SmallVector <BasicBlock*, 8> Latches;
+ SmallVector <BasicBlock *, 8> Latches;
L->getLoopLatches(Latches);
- std::vector<BasicBlock*> Preds;
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
- if (!is_contained(Latches, *PI))
- Preds.push_back(*PI);
+ SmallVector<BasicBlock *, 2> Preds;
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (!is_contained(Latches, Pred))
+ Preds.push_back(Pred);
}
+
BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false);
}
Value *Exec = popSaved();
- if (!isa<UndefValue>(Exec))
- CallInst::Create(EndCf, Exec, "", &*BB->getFirstInsertionPt());
+ Instruction *FirstInsertionPt = &*BB->getFirstInsertionPt();
+ if (!isa<UndefValue>(Exec) && !isa<UnreachableInst>(FirstInsertionPt))
+ CallInst::Create(EndCf, Exec, "", FirstInsertionPt);
}
/// \brief Annotate the control flow with intrinsics so the backend can
/// recognize if/then/else and loops.
bool SIAnnotateControlFlow::runOnFunction(Function &F) {
-
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DA = &getAnalysis<DivergenceAnalysis>();
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
E = df_end(&F.getEntryBlock()); I != E; ++I) {
-
- BranchInst *Term = dyn_cast<BranchInst>((*I)->getTerminator());
+ BasicBlock *BB = *I;
+ BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
if (!Term || Term->isUnconditional()) {
- if (isTopOfStack(*I))
- closeControlFlow(*I);
+ if (isTopOfStack(BB))
+ closeControlFlow(BB);
continue;
}
if (I.nodeVisited(Term->getSuccessor(1))) {
- if (isTopOfStack(*I))
- closeControlFlow(*I);
+ if (isTopOfStack(BB))
+ closeControlFlow(BB);
handleLoop(Term);
continue;
}
- if (isTopOfStack(*I)) {
+ if (isTopOfStack(BB)) {
PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
- if (Phi && Phi->getParent() == *I && isElse(Phi)) {
+ if (Phi && Phi->getParent() == BB && isElse(Phi)) {
insertElse(Term);
eraseIfUnused(Phi);
continue;
}
- closeControlFlow(*I);
+
+ closeControlFlow(BB);
}
+
openIf(Term);
}
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index ff4e32147184..3dd372b32866 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -36,6 +36,7 @@ enum : uint64_t {
// TODO: Should this be spilt into VOP3 a and b?
VOP3 = 1 << 10,
+ VOP3P = 1 << 12,
VINTRP = 1 << 13,
SDWA = 1 << 14,
@@ -65,8 +66,8 @@ enum : uint64_t {
SOPK_ZEXT = UINT64_C(1) << 38,
SCALAR_STORE = UINT64_C(1) << 39,
FIXED_SIZE = UINT64_C(1) << 40,
- VOPAsmPrefer32Bit = UINT64_C(1) << 41
-
+ VOPAsmPrefer32Bit = UINT64_C(1) << 41,
+ HasFPClamp = UINT64_C(1) << 42
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
@@ -102,12 +103,14 @@ namespace AMDGPU {
OPERAND_REG_INLINE_C_FP16,
OPERAND_REG_INLINE_C_FP32,
OPERAND_REG_INLINE_C_FP64,
+ OPERAND_REG_INLINE_C_V2FP16,
+ OPERAND_REG_INLINE_C_V2INT16,
OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_FP16,
OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
- OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_FP64,
+ OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_V2INT16,
OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
@@ -125,9 +128,12 @@ namespace AMDGPU {
// NEG and SEXT share same bit-mask because they can't be set simultaneously.
namespace SISrcMods {
enum {
- NEG = 1 << 0, // Floating-point negate modifier
- ABS = 1 << 1, // Floating-point absolute modifier
- SEXT = 1 << 0 // Integer sign-extend modifier
+ NEG = 1 << 0, // Floating-point negate modifier
+ ABS = 1 << 1, // Floating-point absolute modifier
+ SEXT = 1 << 0, // Integer sign-extend modifier
+ NEG_HI = ABS, // Floating-point negate high packed component modifier.
+ OP_SEL_0 = 1 << 2,
+ OP_SEL_1 = 1 << 3
};
}
@@ -242,6 +248,7 @@ enum Id { // HwRegCode, (6) [5:0]
ID_LDS_ALLOC = 6,
ID_IB_STS = 7,
ID_SYMBOLIC_LAST_ = 8,
+ ID_MEM_BASES = 15,
ID_SHIFT_ = 0,
ID_WIDTH_ = 6,
ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
@@ -251,14 +258,20 @@ enum Offset { // Offset, (5) [10:6]
OFFSET_DEFAULT_ = 0,
OFFSET_SHIFT_ = 6,
OFFSET_WIDTH_ = 5,
- OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_)
+ OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_),
+
+ OFFSET_SRC_SHARED_BASE = 16,
+ OFFSET_SRC_PRIVATE_BASE = 0
};
enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
WIDTH_M1_DEFAULT_ = 31,
WIDTH_M1_SHIFT_ = 11,
WIDTH_M1_WIDTH_ = 5,
- WIDTH_M1_MASK_ = (((1 << WIDTH_M1_WIDTH_) - 1) << WIDTH_M1_SHIFT_)
+ WIDTH_M1_MASK_ = (((1 << WIDTH_M1_WIDTH_) - 1) << WIDTH_M1_SHIFT_),
+
+ WIDTH_M1_SRC_SHARED_BASE = 15,
+ WIDTH_M1_SRC_PRIVATE_BASE = 15
};
} // namespace Hwreg
@@ -300,6 +313,9 @@ enum DstUnused {
#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B84C_USER_SGPR 0xFFFFFFC1
+#define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6)
+#define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1)
+#define C_00B84C_TRAP_HANDLER 0xFFFFFFBF
#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
#define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1)
#define C_00B84C_TGID_X_EN 0xFFFFFF7F
@@ -387,7 +403,6 @@ enum DstUnused {
#define R_SPILLED_SGPRS 0x4
#define R_SPILLED_VGPRS 0x8
-
} // End namespace llvm
#endif
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 6a422e70fe1f..f9d258f44a62 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -65,6 +65,7 @@
/// ultimately led to the creation of an illegal COPY.
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseSet.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
@@ -198,6 +199,10 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
if (!CopyUse.isCopy())
return false;
+ // It is illegal to have vreg inputs to a physreg defining reg_sequence.
+ if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg()))
+ return false;
+
const TargetRegisterClass *SrcRC, *DstRC;
std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
@@ -234,8 +239,9 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
- BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
- .addOperand(MI.getOperand(I));
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
+ TmpReg)
+ .add(MI.getOperand(I));
MI.getOperand(I).setReg(TmpReg);
}
@@ -326,6 +332,27 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
return true;
}
+static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
+ const TargetRegisterInfo *TRI) {
+ DenseSet<MachineBasicBlock*> Visited;
+ SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(),
+ MBB->pred_end());
+
+ while (!Worklist.empty()) {
+ MachineBasicBlock *mbb = Worklist.back();
+ Worklist.pop_back();
+
+ if (!Visited.insert(mbb).second)
+ continue;
+ if (hasTerminatorThatModifiesExec(*mbb, *TRI))
+ return true;
+
+ Worklist.insert(Worklist.end(), mbb->pred_begin(), mbb->pred_end());
+ }
+
+ return false;
+}
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -382,8 +409,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
- MachineBasicBlock *NCD = MDT->findNearestCommonDominator(MBB0, MBB1);
- if (NCD && !hasTerminatorThatModifiesExec(*NCD, *TRI)) {
+ if (!predsHasDivergentTerminator(MBB0, TRI) &&
+ !predsHasDivergentTerminator(MBB1, TRI)) {
DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
break;
}
diff --git a/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
new file mode 100644
index 000000000000..3d3121788b5e
--- /dev/null
+++ b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
@@ -0,0 +1,72 @@
+//===-- SIFixVGPRCopies.cpp - Fix VGPR Copies after regalloc --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Add implicit use of exec to vector register copies.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-fix-vgpr-copies"
+
+namespace {
+
+class SIFixVGPRCopies : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIFixVGPRCopies() : MachineFunctionPass(ID) {
+ initializeSIFixVGPRCopiesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return "SI Fix VGPR copies"; }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIFixVGPRCopies, DEBUG_TYPE, "SI Fix VGPR copies", false, false)
+
+char SIFixVGPRCopies::ID = 0;
+
+char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopies::ID;
+
+bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) {
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ if (TII->isVGPRCopy(MI) && !MI.readsRegister(AMDGPU::EXEC, TRI)) {
+ MI.addOperand(MF,
+ MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+ DEBUG(dbgs() << "Add exec use to " << MI);
+ Changed = true;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index a5c0d4923d6b..d63414735b95 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -12,6 +12,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -66,6 +67,7 @@ public:
MachineRegisterInfo *MRI;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
+ const SISubtarget *ST;
void foldOperand(MachineOperand &OpToFold,
MachineInstr *UseMI,
@@ -75,6 +77,12 @@ public:
void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
+ const MachineOperand *isClamp(const MachineInstr &MI) const;
+ bool tryFoldClamp(MachineInstr &MI);
+
+ std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
+ bool tryFoldOMod(MachineInstr &MI);
+
public:
SIFoldOperands() : MachineFunctionPass(ID) {
initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
@@ -131,27 +139,6 @@ FunctionPass *llvm::createSIFoldOperandsPass() {
return new SIFoldOperands();
}
-static bool isSafeToFold(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case AMDGPU::V_MOV_B32_e32:
- case AMDGPU::V_MOV_B32_e64:
- case AMDGPU::V_MOV_B64_PSEUDO: {
- // If there are additional implicit register operands, this may be used for
- // register indexing so the source register operand isn't simply copied.
- unsigned NumOps = MI.getDesc().getNumOperands() +
- MI.getDesc().getNumImplicitUses();
-
- return MI.getNumOperands() == NumOps;
- }
- case AMDGPU::S_MOV_B32:
- case AMDGPU::S_MOV_B64:
- case AMDGPU::COPY:
- return true;
- default:
- return false;
- }
-}
-
static bool updateOperand(FoldCandidate &Fold,
const TargetRegisterInfo &TRI) {
MachineInstr *MI = Fold.UseMI;
@@ -359,8 +346,6 @@ void SIFoldOperands::foldOperand(
const TargetRegisterClass *FoldRC =
TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
- APInt Imm(TII->operandBitWidth(FoldDesc.OpInfo[1].OperandType),
- OpToFold.getImm());
// Split 64-bit constants into 32-bits for folding.
if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
@@ -370,21 +355,25 @@ void SIFoldOperands::foldOperand(
MRI->getRegClass(UseReg) :
TRI->getPhysRegClass(UseReg);
- assert(Imm.getBitWidth() == 64);
-
if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
return;
+ APInt Imm(64, OpToFold.getImm());
if (UseOp.getSubReg() == AMDGPU::sub0) {
Imm = Imm.getLoBits(32);
} else {
assert(UseOp.getSubReg() == AMDGPU::sub1);
Imm = Imm.getHiBits(32);
}
+
+ MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+ tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
+ return;
}
- MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
- tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
+
+
+ tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
}
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
@@ -581,6 +570,32 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
return false;
}
+// Try to fold an instruction into a simpler one
+static bool tryFoldInst(const SIInstrInfo *TII,
+ MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+
+ if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
+ Opc == AMDGPU::V_CNDMASK_B32_e64 ||
+ Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
+ const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
+ if (Src1->isIdenticalTo(*Src0)) {
+ DEBUG(dbgs() << "Folded " << *MI << " into ");
+ int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ if (Src2Idx != -1)
+ MI->RemoveOperand(Src2Idx);
+ MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
+ mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
+ : getMovOpc(false)));
+ DEBUG(dbgs() << *MI << '\n');
+ return true;
+ }
+ }
+
+ return false;
+}
+
void SIFoldOperands::foldInstOperand(MachineInstr &MI,
MachineOperand &OpToFold) const {
// We need mutate the operands of new mov instructions to add implicit
@@ -682,20 +697,213 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
}
DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
+ tryFoldInst(TII, Fold.UseMI);
}
}
}
+const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
+ unsigned Op = MI.getOpcode();
+ switch (Op) {
+ case AMDGPU::V_MAX_F32_e64:
+ case AMDGPU::V_MAX_F16_e64:
+ case AMDGPU::V_MAX_F64: {
+ if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
+ return nullptr;
+
+ // Make sure sources are identical.
+ const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (!Src0->isReg() || Src0->getSubReg() != Src1->getSubReg() ||
+ Src0->getSubReg() != AMDGPU::NoSubRegister)
+ return nullptr;
+
+ // Can't fold up if we have modifiers.
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+ return nullptr;
+ return Src0;
+ }
+ default:
+ return nullptr;
+ }
+}
+
+// We obviously have multiple uses in a clamp since the register is used twice
+// in the same instruction.
+static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) {
+ int Count = 0;
+ for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
+ I != E; ++I) {
+ if (++Count > 1)
+ return false;
+ }
+
+ return true;
+}
+
+bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
+ const MachineOperand *ClampSrc = isClamp(MI);
+ if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg()))
+ return false;
+
+ MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
+ if (!TII->hasFPClamp(*Def))
+ return false;
+ MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
+ if (!DefClamp)
+ return false;
+
+ DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def << '\n');
+
+ // Clamp is applied after omod, so it is OK if omod is set.
+ DefClamp->setImm(1);
+ MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
+static int getOModValue(unsigned Opc, int64_t Val) {
+ switch (Opc) {
+ case AMDGPU::V_MUL_F32_e64: {
+ switch (static_cast<uint32_t>(Val)) {
+ case 0x3f000000: // 0.5
+ return SIOutMods::DIV2;
+ case 0x40000000: // 2.0
+ return SIOutMods::MUL2;
+ case 0x40800000: // 4.0
+ return SIOutMods::MUL4;
+ default:
+ return SIOutMods::NONE;
+ }
+ }
+ case AMDGPU::V_MUL_F16_e64: {
+ switch (static_cast<uint16_t>(Val)) {
+ case 0x3800: // 0.5
+ return SIOutMods::DIV2;
+ case 0x4000: // 2.0
+ return SIOutMods::MUL2;
+ case 0x4400: // 4.0
+ return SIOutMods::MUL4;
+ default:
+ return SIOutMods::NONE;
+ }
+ }
+ default:
+ llvm_unreachable("invalid mul opcode");
+ }
+}
+
+// FIXME: Does this really not support denormals with f16?
+// FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
+// handled, so will anything other than that break?
+std::pair<const MachineOperand *, int>
+SIFoldOperands::isOMod(const MachineInstr &MI) const {
+ unsigned Op = MI.getOpcode();
+ switch (Op) {
+ case AMDGPU::V_MUL_F32_e64:
+ case AMDGPU::V_MUL_F16_e64: {
+ // If output denormals are enabled, omod is ignored.
+ if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) ||
+ (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals()))
+ return std::make_pair(nullptr, SIOutMods::NONE);
+
+ const MachineOperand *RegOp = nullptr;
+ const MachineOperand *ImmOp = nullptr;
+ const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (Src0->isImm()) {
+ ImmOp = Src0;
+ RegOp = Src1;
+ } else if (Src1->isImm()) {
+ ImmOp = Src1;
+ RegOp = Src0;
+ } else
+ return std::make_pair(nullptr, SIOutMods::NONE);
+
+ int OMod = getOModValue(Op, ImmOp->getImm());
+ if (OMod == SIOutMods::NONE ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+ return std::make_pair(nullptr, SIOutMods::NONE);
+
+ return std::make_pair(RegOp, OMod);
+ }
+ case AMDGPU::V_ADD_F32_e64:
+ case AMDGPU::V_ADD_F16_e64: {
+ // If output denormals are enabled, omod is ignored.
+ if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) ||
+ (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals()))
+ return std::make_pair(nullptr, SIOutMods::NONE);
+
+ // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
+ const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+
+ if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
+ Src0->getSubReg() == Src1->getSubReg() &&
+ !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
+ !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
+ !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
+ !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+ return std::make_pair(Src0, SIOutMods::MUL2);
+
+ return std::make_pair(nullptr, SIOutMods::NONE);
+ }
+ default:
+ return std::make_pair(nullptr, SIOutMods::NONE);
+ }
+}
+
+// FIXME: Does this need to check IEEE bit on function?
+bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
+ const MachineOperand *RegOp;
+ int OMod;
+ std::tie(RegOp, OMod) = isOMod(MI);
+ if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
+ RegOp->getSubReg() != AMDGPU::NoSubRegister ||
+ !hasOneNonDBGUseInst(*MRI, RegOp->getReg()))
+ return false;
+
+ MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
+ MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
+ if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
+ return false;
+
+ // Clamp is applied after omod. If the source already has clamp set, don't
+ // fold it.
+ if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
+ return false;
+
+ DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
+
+ DefOMod->setImm(OMod);
+ MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
-
MRI = &MF.getRegInfo();
- TII = ST.getInstrInfo();
+ ST = &MF.getSubtarget<SISubtarget>();
+ TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ // omod is ignored by hardware if IEEE bit is enabled. omod also does not
+ // correctly handle signed zeros.
+ //
+ // TODO: Check nsz on instructions when fast math flags are preserved to MI
+ // level.
+ bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
+
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
@@ -705,8 +913,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
Next = std::next(I);
MachineInstr &MI = *I;
- if (!isSafeToFold(MI))
+ tryFoldInst(TII, &MI);
+
+ if (!TII->isFoldableCopy(MI)) {
+ if (IsIEEEMode || !tryFoldOMod(MI))
+ tryFoldClamp(MI);
continue;
+ }
MachineOperand &OpToFold = MI.getOperand(1);
bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index 0b5715515880..abe6af9a6d3f 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -21,22 +21,24 @@
using namespace llvm;
-static ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF,
- const SIRegisterInfo *TRI) {
+static ArrayRef<MCPhysReg> getAllSGPR128(const SISubtarget &ST,
+ const MachineFunction &MF) {
return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
- TRI->getMaxNumSGPRs(MF) / 4);
+ ST.getMaxNumSGPRs(MF) / 4);
}
-static ArrayRef<MCPhysReg> getAllSGPRs(const MachineFunction &MF,
- const SIRegisterInfo *TRI) {
+static ArrayRef<MCPhysReg> getAllSGPRs(const SISubtarget &ST,
+ const MachineFunction &MF) {
return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
- TRI->getMaxNumSGPRs(MF));
+ ST.getMaxNumSGPRs(MF));
}
-void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,
- const SIRegisterInfo* TRI,
+void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
MachineFunction &MF,
MachineBasicBlock &MBB) const {
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo* TRI = &TII->getRegisterInfo();
+
// We don't need this if we only have spills since there is no user facing
// scratch.
@@ -59,16 +61,28 @@ void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,
MRI.addLiveIn(FlatScratchInitReg);
MBB.addLiveIn(FlatScratchInitReg);
- // Copy the size in bytes.
- unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
- .addReg(FlatScrInitHi, RegState::Kill);
-
unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
+ unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+ // Do a 64-bit pointer add.
+ if (ST.flatScratchIsPointer()) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
+ .addReg(FlatScrInitLo)
+ .addReg(ScratchWaveOffsetReg);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
+ .addReg(FlatScrInitHi)
+ .addImm(0);
+
+ return;
+ }
+
+ // Copy the size in bytes.
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
+ .addReg(FlatScrInitHi, RegState::Kill);
+
// Add wave offset in bytes to private base offset.
// See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
@@ -111,16 +125,15 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
- ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(MF, TRI);
+ ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
- // Skip the last 2 elements because the last one is reserved for VCC, and
- // this is the 2nd to last element already.
+ // Skip the last N reserved elements because they should have already been
+ // reserved for VCC etc.
for (MCPhysReg Reg : AllSGPR128s) {
// Pick the first unallocated one. Make sure we don't clobber the other
// reserved input we needed.
if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
- //assert(MRI.isAllocatable(Reg));
MRI.replaceRegWith(ScratchRsrcReg, Reg);
MFI->setScratchRSrcReg(Reg);
return Reg;
@@ -143,10 +156,9 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
MachineRegisterInfo &MRI = MF.getRegInfo();
-
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
- ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(MF, TRI);
+ ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
if (NumPreloaded > AllSGPRs.size())
return ScratchWaveOffsetReg;
@@ -190,6 +202,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
// specified.
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ auto AMDGPUASI = ST.getAMDGPUAS();
if (ST.debuggerEmitPrologue())
emitDebuggerPrologue(MF, MBB);
@@ -229,7 +242,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// emitted after frame indices are eliminated.
if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
- emitFlatScratchInit(TII, TRI, MF, MBB);
+ emitFlatScratchInit(ST, MF, MBB);
// We need to insert initialization of the scratch resource descriptor.
unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
@@ -328,7 +341,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
PointerType *PtrTy =
PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
- AMDGPUAS::CONSTANT_ADDRESS);
+ AMDGPUASI.CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
auto MMO = MF.getMachineMemOperand(PtrInfo,
MachineMemOperand::MOLoad |
@@ -371,6 +384,24 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
}
+static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
+ for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
+ I != E; ++I) {
+ if (!MFI.isDeadObjectIndex(I))
+ return false;
+ }
+
+ return true;
+}
+
+int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ const SIRegisterInfo *RI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
+
+ FrameReg = RI->getFrameRegister(MF);
+ return MF.getFrameInfo().getObjectOffset(FI);
+}
+
void SIFrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF,
RegScavenger *RS) const {
@@ -379,15 +410,66 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
if (!MFI.hasStackObjects())
return;
- bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ bool AllSGPRSpilledToVGPRs = false;
+
+ if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
+ AllSGPRSpilledToVGPRs = true;
+
+ // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
+ // are spilled to VGPRs, in which case we can eliminate the stack usage.
+ //
+ // XXX - This operates under the assumption that only other SGPR spills are
+ // users of the frame index. I'm not 100% sure this is correct. The
+ // StackColoring pass has a comment saying a future improvement would be to
+ // merging of allocas with spill slots, but for now according to
+ // MachineFrameInfo isSpillSlot can't alias any other object.
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator Next;
+ for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
+ MachineInstr &MI = *I;
+ Next = std::next(I);
+
+ if (TII->isSGPRSpill(MI)) {
+ int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
+ if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
+ bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
+ (void)Spilled;
+ assert(Spilled && "failed to spill SGPR to VGPR when allocated");
+ } else
+ AllSGPRSpilledToVGPRs = false;
+ }
+ }
+ }
- assert((RS || !MayNeedScavengingEmergencySlot) &&
- "RegScavenger required if spilling");
+ FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
+ }
- if (MayNeedScavengingEmergencySlot) {
- int ScavengeFI = MFI.CreateStackObject(
- AMDGPU::SGPR_32RegClass.getSize(),
- AMDGPU::SGPR_32RegClass.getAlignment(), false);
+ // FIXME: The other checks should be redundant with allStackObjectsAreDead,
+ // but currently hasNonSpillStackObjects is set only from source
+ // allocas. Stack temps produced from legalization are not counted currently.
+ if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
+ !AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
+ assert(RS && "RegScavenger required if spilling");
+
+ // We force this to be at offset 0 so no user object ever has 0 as an
+ // address, so we may use 0 as an invalid pointer value. This is because
+ // LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
+ // is required to be address space 0, we are forced to accept this for
+ // now. Ideally we could have the stack in another address space with 0 as a
+ // valid pointer, and -1 as the null value.
+ //
+ // This will also waste additional space when user stack objects require > 4
+ // byte alignment.
+ //
+ // The main cost here is losing the offset for addressing modes. However
+ // this also ensures we shouldn't need a register for the offset when
+ // emergency scavenging.
+ int ScavengeFI = MFI.CreateFixedObject(
+ AMDGPU::SGPR_32RegClass.getSize(), 0, false);
RS->addScavengingFrameIndex(ScavengeFI);
}
}
diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h
index 7657b4e03864..1bfc08093da2 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/lib/Target/AMDGPU/SIFrameLowering.h
@@ -30,14 +30,15 @@ public:
MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
void processFunctionBeforeFrameFinalized(
MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
private:
- void emitFlatScratchInit(const SIInstrInfo *TII,
- const SIRegisterInfo* TRI,
+ void emitFlatScratchInit(const SISubtarget &ST,
MachineFunction &MF,
MachineBasicBlock &MBB) const;
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index b98f9f400ee7..7268131396dc 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15,26 +15,70 @@
#ifdef _MSC_VER
// Provide M_PI.
#define _USE_MATH_DEFINES
-#include <cmath>
#endif
#include "AMDGPU.h"
#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUTargetMachine.h"
#include "AMDGPUSubtarget.h"
#include "SIDefines.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/DAGCombine.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -43,7 +87,6 @@ static cl::opt<bool> EnableVGPRIndexMode(
cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
cl::init(false));
-
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
@@ -84,6 +127,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::f16, &AMDGPU::SReg_32_XM0RegClass);
}
+ if (Subtarget->hasVOP3PInsts()) {
+ addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32_XM0RegClass);
+ addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32_XM0RegClass);
+ }
+
computeRegisterProperties(STI.getRegisterInfo());
// We need to custom lower vector stores from local memory
@@ -110,7 +158,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
setTruncStoreAction(MVT::v32i32, MVT::v32i8, Expand);
-
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand);
@@ -142,10 +189,17 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
+
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom);
+
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
@@ -153,9 +207,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::UADDO, MVT::i32, Legal);
+ setOperationAction(ISD::USUBO, MVT::i32, Legal);
+
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
- for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64}) {
+ for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
+ MVT::v2i64, MVT::v2f64}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
@@ -202,6 +260,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
+ // Avoid stack access for these.
+ // TODO: Generalize to more vector types.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
+
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
// and output demarshalling
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
@@ -222,7 +287,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// On SI this is s_memtime and s_memrealtime on VI.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
- setOperationAction(ISD::TRAP, MVT::Other, Custom);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@@ -303,6 +369,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::f16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::f16, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Custom);
// F16 - VOP2 Actions.
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
@@ -317,6 +384,85 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAD, MVT::f16, Legal);
}
+ if (Subtarget->hasVOP3PInsts()) {
+ for (MVT VT : {MVT::v2i16, MVT::v2f16}) {
+ for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
+ switch (Op) {
+ case ISD::LOAD:
+ case ISD::STORE:
+ case ISD::BUILD_VECTOR:
+ case ISD::BITCAST:
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::INSERT_VECTOR_ELT:
+ case ISD::INSERT_SUBVECTOR:
+ case ISD::EXTRACT_SUBVECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ break;
+ case ISD::CONCAT_VECTORS:
+ setOperationAction(Op, VT, Custom);
+ break;
+ default:
+ setOperationAction(Op, VT, Expand);
+ break;
+ }
+ }
+ }
+
+ // XXX - Do these do anything? Vector constants turn into build_vector.
+ setOperationAction(ISD::Constant, MVT::v2i16, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::v2f16, Legal);
+
+ setOperationAction(ISD::STORE, MVT::v2i16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v2i16, MVT::i32);
+ setOperationAction(ISD::STORE, MVT::v2f16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v2f16, MVT::i32);
+
+ setOperationAction(ISD::LOAD, MVT::v2i16, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v2i16, MVT::i32);
+ setOperationAction(ISD::LOAD, MVT::v2f16, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v2f16, MVT::i32);
+
+ setOperationAction(ISD::AND, MVT::v2i16, Promote);
+ AddPromotedToType(ISD::AND, MVT::v2i16, MVT::i32);
+ setOperationAction(ISD::OR, MVT::v2i16, Promote);
+ AddPromotedToType(ISD::OR, MVT::v2i16, MVT::i32);
+ setOperationAction(ISD::XOR, MVT::v2i16, Promote);
+ AddPromotedToType(ISD::XOR, MVT::v2i16, MVT::i32);
+ setOperationAction(ISD::SELECT, MVT::v2i16, Promote);
+ AddPromotedToType(ISD::SELECT, MVT::v2i16, MVT::i32);
+ setOperationAction(ISD::SELECT, MVT::v2f16, Promote);
+ AddPromotedToType(ISD::SELECT, MVT::v2f16, MVT::i32);
+
+ setOperationAction(ISD::ADD, MVT::v2i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v2i16, Legal);
+ setOperationAction(ISD::MUL, MVT::v2i16, Legal);
+ setOperationAction(ISD::SHL, MVT::v2i16, Legal);
+ setOperationAction(ISD::SRL, MVT::v2i16, Legal);
+ setOperationAction(ISD::SRA, MVT::v2i16, Legal);
+ setOperationAction(ISD::SMIN, MVT::v2i16, Legal);
+ setOperationAction(ISD::UMIN, MVT::v2i16, Legal);
+ setOperationAction(ISD::SMAX, MVT::v2i16, Legal);
+ setOperationAction(ISD::UMAX, MVT::v2i16, Legal);
+
+ setOperationAction(ISD::FADD, MVT::v2f16, Legal);
+ setOperationAction(ISD::FNEG, MVT::v2f16, Legal);
+ setOperationAction(ISD::FMUL, MVT::v2f16, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f16, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v2f16, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v2f16, Legal);
+
+ // This isn't really legal, but this avoids the legalizer unrolling it (and
+ // allows matching fneg (fabs x) patterns)
+ setOperationAction(ISD::FABS, MVT::v2f16, Legal);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
+
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i32, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand);
+ }
+
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMINNUM);
@@ -332,6 +478,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::FCANONICALIZE);
+ setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
// All memory operations. Some folding on the pointer operand is done to help
// matching the constant offsets in the addressing modes.
@@ -364,30 +512,49 @@ const SISubtarget *SITargetLowering::getSubtarget() const {
// TargetLowering queries
//===----------------------------------------------------------------------===//
+bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
+ EVT) const {
+ // SI has some legal vector types, but no legal vector operations. Say no
+ // shuffles are legal in order to prefer scalarizing some vector operations.
+ return false;
+}
+
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
unsigned IntrID) const {
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_atomic_dec: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align = 0;
- Info.vol = false;
+
+ const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
+ Info.vol = !Vol || !Vol->isNullValue();
Info.readMem = true;
Info.writeMem = true;
return true;
+ }
default:
return false;
}
}
-bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
- EVT) const {
- // SI has some legal vector types, but no legal vector operations. Say no
- // shuffles are legal in order to prefer scalarizing some vector operations.
- return false;
+bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
+ SmallVectorImpl<Value*> &Ops,
+ Type *&AccessTy) const {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec: {
+ Value *Ptr = II->getArgOperand(0);
+ AccessTy = II->getType();
+ Ops.push_back(Ptr);
+ return true;
+ }
+ default:
+ return false;
+ }
}
bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
@@ -438,8 +605,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AM.BaseGV)
return false;
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS: {
+ if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
// Assume the we will use FLAT for all global memory accesses
// on VI.
@@ -454,8 +620,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
}
return isLegalMUBUFAddressingMode(AM);
- }
- case AMDGPUAS::CONSTANT_ADDRESS: {
+ } else if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
// If the offset isn't a multiple of 4, it probably isn't going to be
// correctly aligned.
// FIXME: Can we get the real alignment here?
@@ -478,7 +643,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
// in 8-bits, it can use a smaller encoding.
if (!isUInt<32>(AM.BaseOffs / 4))
return false;
- } else if (Subtarget->getGeneration() == SISubtarget::VOLCANIC_ISLANDS) {
+ } else if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
// On VI, these use the SMEM format and the offset is 20-bit in bytes.
if (!isUInt<20>(AM.BaseOffs))
return false;
@@ -492,13 +657,11 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return true;
return false;
- }
- case AMDGPUAS::PRIVATE_ADDRESS:
+ } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
return isLegalMUBUFAddressingMode(AM);
-
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS: {
+ } else if (AS == AMDGPUASI.LOCAL_ADDRESS ||
+ AS == AMDGPUASI.REGION_ADDRESS) {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
// field.
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
@@ -513,17 +676,15 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return true;
return false;
- }
- case AMDGPUAS::FLAT_ADDRESS:
- case AMDGPUAS::UNKNOWN_ADDRESS_SPACE:
+ } else if (AS == AMDGPUASI.FLAT_ADDRESS ||
+ AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) {
// For an unknown address space, this usually means that this is for some
// reason being used for pure arithmetic, and not based on some addressing
// computation. We don't have instructions that compute pointers with any
// addressing modes, so treat them as having no offset like flat
// instructions.
return isLegalFlatAddressingMode(AM);
-
- default:
+ } else {
llvm_unreachable("unhandled address space");
}
}
@@ -544,8 +705,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return false;
}
- if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
- AddrSpace == AMDGPUAS::REGION_ADDRESS) {
+ if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUASI.REGION_ADDRESS) {
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
// with adjacent offsets.
@@ -560,8 +721,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// will access scratch. If we had access to the IR function, then we
// could determine if any private memory was used in the function.
if (!Subtarget->hasUnalignedScratchAccess() &&
- (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
- AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
+ (AddrSpace == AMDGPUASI.PRIVATE_ADDRESS ||
+ AddrSpace == AMDGPUASI.FLAT_ADDRESS)) {
return false;
}
@@ -569,7 +730,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// If we have an uniform constant load, it still requires using a slow
// buffer instruction if unaligned.
if (IsFast) {
- *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) ?
+ *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS) ?
(Align % 4 == 0) : true;
}
@@ -609,15 +770,16 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
return MVT::Other;
}
-static bool isFlatGlobalAddrSpace(unsigned AS) {
- return AS == AMDGPUAS::GLOBAL_ADDRESS ||
- AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS;
+static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) {
+ return AS == AMDGPUASI.GLOBAL_ADDRESS ||
+ AS == AMDGPUASI.FLAT_ADDRESS ||
+ AS == AMDGPUASI.CONSTANT_ADDRESS;
}
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
- return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
+ return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) &&
+ isFlatGlobalAddrSpace(DestAS, AMDGPUASI);
}
bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
@@ -631,7 +793,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
// Flat -> private/local is a simple truncate.
// Flat -> global is no-op
- if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
+ if (SrcAS == AMDGPUASI.FLAT_ADDRESS)
return true;
return isNoopAddrSpaceCast(SrcAS, DestAS);
@@ -639,18 +801,8 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
const MemSDNode *MemNode = cast<MemSDNode>(N);
- const Value *Ptr = MemNode->getMemOperand()->getValue();
- // UndefValue means this is a load of a kernel input. These are uniform.
- // Sometimes LDS instructions have constant pointers.
- // If Ptr is null, then that means this mem operand contains a
- // PseudoSourceValue like GOT.
- if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
- isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
- return true;
-
- const Instruction *I = dyn_cast<Instruction>(Ptr);
- return I && I->getMetadata("amdgpu.uniform");
+ return AMDGPU::isUniformMMO(MemNode->getMemOperand());
}
TargetLoweringBase::LegalizeTypeAction
@@ -693,40 +845,28 @@ bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
return TargetLowering::isTypeDesirableForOp(Op, VT);
}
-SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
- const SDLoc &SL, SDValue Chain,
- unsigned Offset) const {
+SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
+ const SDLoc &SL,
+ SDValue Chain,
+ uint64_t Offset) const {
const DataLayout &DL = DAG.getDataLayout();
MachineFunction &MF = DAG.getMachineFunction();
const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
- unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
+ unsigned InputPtrReg = TRI->getPreloadedValue(MF,
+ SIRegisterInfo::KERNARG_SEGMENT_PTR);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
+ MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
DAG.getConstant(Offset, SL, PtrVT));
}
-SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
- const SDLoc &SL, SDValue Chain,
- unsigned Offset, bool Signed,
+SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
+ const SDLoc &SL, SDValue Val,
+ bool Signed,
const ISD::InputArg *Arg) const {
- const DataLayout &DL = DAG.getDataLayout();
- Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
-
- unsigned Align = DL.getABITypeAlignment(Ty);
-
- SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset);
- SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
- MachineMemOperand::MONonTemporal |
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
-
- SDValue Val = Load;
if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) &&
VT.bitsLT(MemVT)) {
unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
@@ -740,373 +880,434 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
else
Val = DAG.getZExtOrTrunc(Val, SL, VT);
- return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
+ return Val;
}
-SDValue SITargetLowering::LowerFormalArguments(
- SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
-
- MachineFunction &MF = DAG.getMachineFunction();
- FunctionType *FType = MF.getFunction()->getFunctionType();
- SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+SDValue SITargetLowering::lowerKernargMemParameter(
+ SelectionDAG &DAG, EVT VT, EVT MemVT,
+ const SDLoc &SL, SDValue Chain,
+ uint64_t Offset, bool Signed,
+ const ISD::InputArg *Arg) const {
+ const DataLayout &DL = DAG.getDataLayout();
+ Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
- if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
- const Function *Fn = MF.getFunction();
- DiagnosticInfoUnsupported NoGraphicsHSA(
- *Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
- DAG.getContext()->diagnose(NoGraphicsHSA);
- return DAG.getEntryNode();
- }
+ unsigned Align = DL.getABITypeAlignment(Ty);
- // Create stack objects that are used for emitting debugger prologue if
- // "amdgpu-debugger-emit-prologue" attribute was specified.
- if (ST.debuggerEmitPrologue())
- createDebuggerPrologueStackObjects(MF);
+ SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
+ SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
+ MachineMemOperand::MONonTemporal |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant);
- SmallVector<ISD::InputArg, 16> Splits;
- BitVector Skipped(Ins.size());
+ SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
+ return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
+}
- for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
- const ISD::InputArg &Arg = Ins[i];
+static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
+ CallingConv::ID CallConv,
+ ArrayRef<ISD::InputArg> Ins,
+ BitVector &Skipped,
+ FunctionType *FType,
+ SIMachineFunctionInfo *Info) {
+ for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
+ const ISD::InputArg &Arg = Ins[I];
- // First check if it's a PS input addr
+ // First check if it's a PS input addr.
if (CallConv == CallingConv::AMDGPU_PS && !Arg.Flags.isInReg() &&
!Arg.Flags.isByVal() && PSInputNum <= 15) {
if (!Arg.Used && !Info->isPSInputAllocated(PSInputNum)) {
- // We can safely skip PS inputs
- Skipped.set(i);
+ // We can safely skip PS inputs.
+ Skipped.set(I);
++PSInputNum;
continue;
}
Info->markPSInputAllocated(PSInputNum);
if (Arg.Used)
- Info->PSInputEna |= 1 << PSInputNum;
+ Info->markPSInputEnabled(PSInputNum);
++PSInputNum;
}
- if (AMDGPU::isShader(CallConv)) {
- // Second split vertices into their elements
- if (Arg.VT.isVector()) {
- ISD::InputArg NewArg = Arg;
- NewArg.Flags.setSplit();
- NewArg.VT = Arg.VT.getVectorElementType();
-
- // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
- // three or five element vertex only needs three or five registers,
- // NOT four or eight.
- Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- for (unsigned j = 0; j != NumElements; ++j) {
- Splits.push_back(NewArg);
- NewArg.PartOffset += NewArg.VT.getStoreSize();
- }
- } else {
- Splits.push_back(Arg);
+ // Second split vertices into their elements.
+ if (Arg.VT.isVector()) {
+ ISD::InputArg NewArg = Arg;
+ NewArg.Flags.setSplit();
+ NewArg.VT = Arg.VT.getVectorElementType();
+
+ // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
+ // three or five element vertex only needs three or five registers,
+ // NOT four or eight.
+ Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
+ unsigned NumElements = ParamType->getVectorNumElements();
+
+ for (unsigned J = 0; J != NumElements; ++J) {
+ Splits.push_back(NewArg);
+ NewArg.PartOffset += NewArg.VT.getStoreSize();
}
+ } else {
+ Splits.push_back(Arg);
}
}
+}
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
+// Allocate special inputs passed in VGPRs.
+static void allocateSpecialInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
+ if (Info.hasWorkItemIDX()) {
+ unsigned Reg = TRI.getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
- // At least one interpolation mode must be enabled or else the GPU will hang.
- //
- // Check PSInputAddr instead of PSInputEna. The idea is that if the user set
- // PSInputAddr, the user wants to enable some bits after the compilation
- // based on run-time states. Since we can't know what the final PSInputEna
- // will look like, so we shouldn't do anything here and the user should take
- // responsibility for the correct programming.
- //
- // Otherwise, the following restrictions apply:
- // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
- // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
- // enabled too.
- if (CallConv == CallingConv::AMDGPU_PS &&
- ((Info->getPSInputAddr() & 0x7F) == 0 ||
- ((Info->getPSInputAddr() & 0xF) == 0 && Info->isPSInputAllocated(11)))) {
- CCInfo.AllocateReg(AMDGPU::VGPR0);
- CCInfo.AllocateReg(AMDGPU::VGPR1);
- Info->markPSInputAllocated(0);
- Info->PSInputEna |= 1;
- }
-
- if (!AMDGPU::isShader(CallConv)) {
- assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
- } else {
- assert(!Info->hasDispatchPtr() &&
- !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
- !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
- !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
- !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
- !Info->hasWorkItemIDZ());
+ if (Info.hasWorkItemIDY()) {
+ unsigned Reg = TRI.getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
}
- if (Info->hasPrivateMemoryInputPtr()) {
- unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI);
- MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass);
+ if (Info.hasWorkItemIDZ()) {
+ unsigned Reg = TRI.getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+}
+
+// Allocate special inputs passed in user SGPRs.
+static void allocateHSAUserSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
+ if (Info.hasPrivateMemoryInputPtr()) {
+ unsigned PrivateMemoryPtrReg = Info.addPrivateMemoryPtr(TRI);
+ MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(PrivateMemoryPtrReg);
}
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
- if (Info->hasPrivateSegmentBuffer()) {
- unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
- MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
+ if (Info.hasPrivateSegmentBuffer()) {
+ unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
+ MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
- if (Info->hasDispatchPtr()) {
- unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
+ if (Info.hasDispatchPtr()) {
+ unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);
}
- if (Info->hasQueuePtr()) {
- unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
+ if (Info.hasQueuePtr()) {
+ unsigned QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
}
- if (Info->hasKernargSegmentPtr()) {
- unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
+ if (Info.hasKernargSegmentPtr()) {
+ unsigned InputPtrReg = Info.addKernargSegmentPtr(TRI);
MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(InputPtrReg);
}
- if (Info->hasDispatchID()) {
- unsigned DispatchIDReg = Info->addDispatchID(*TRI);
+ if (Info.hasDispatchID()) {
+ unsigned DispatchIDReg = Info.addDispatchID(TRI);
MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchIDReg);
}
- if (Info->hasFlatScratchInit()) {
- unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
+ if (Info.hasFlatScratchInit()) {
+ unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
}
- if (!AMDGPU::isShader(CallConv))
- analyzeFormalArgumentsCompute(CCInfo, Ins);
- else
- AnalyzeFormalArguments(CCInfo, Splits);
-
- SmallVector<SDValue, 16> Chains;
-
- for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
-
- const ISD::InputArg &Arg = Ins[i];
- if (Skipped[i]) {
- InVals.push_back(DAG.getUNDEF(Arg.VT));
- continue;
- }
-
- CCValAssign &VA = ArgLocs[ArgIdx++];
- MVT VT = VA.getLocVT();
-
- if (VA.isMemLoc()) {
- VT = Ins[i].VT;
- EVT MemVT = VA.getLocVT();
- const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
- VA.getLocMemOffset();
- // The first 36 bytes of the input buffer contains information about
- // thread group and global sizes.
- SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain,
- Offset, Ins[i].Flags.isSExt(),
- &Ins[i]);
- Chains.push_back(Arg.getValue(1));
-
- auto *ParamTy =
- dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
- if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
- ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
- // On SI local pointers are just offsets into LDS, so they are always
- // less than 16-bits. On CI and newer they could potentially be
- // real pointers, so we can't guarantee their size.
- Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
- DAG.getValueType(MVT::i16));
- }
-
- InVals.push_back(Arg);
- Info->setABIArgOffset(Offset + MemVT.getStoreSize());
- continue;
- }
- assert(VA.isRegLoc() && "Parameter must be in a register!");
-
- unsigned Reg = VA.getLocReg();
-
- if (VT == MVT::i64) {
- // For now assume it is a pointer
- Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
- &AMDGPU::SGPR_64RegClass);
- Reg = MF.addLiveIn(Reg, &AMDGPU::SGPR_64RegClass);
- SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- InVals.push_back(Copy);
- continue;
- }
-
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
-
- Reg = MF.addLiveIn(Reg, RC);
- SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
-
- if (Arg.VT.isVector()) {
-
- // Build a vector from the registers
- Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- SmallVector<SDValue, 4> Regs;
- Regs.push_back(Val);
- for (unsigned j = 1; j != NumElements; ++j) {
- Reg = ArgLocs[ArgIdx++].getLocReg();
- Reg = MF.addLiveIn(Reg, RC);
-
- SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- Regs.push_back(Copy);
- }
-
- // Fill up the missing vector elements
- NumElements = Arg.VT.getVectorNumElements() - NumElements;
- Regs.append(NumElements, DAG.getUNDEF(VT));
-
- InVals.push_back(DAG.getBuildVector(Arg.VT, DL, Regs));
- continue;
- }
-
- InVals.push_back(Val);
- }
-
// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
// these from the dispatch pointer.
+}
- // Start adding system SGPRs.
- if (Info->hasWorkGroupIDX()) {
- unsigned Reg = Info->addWorkGroupIDX();
+// Allocate special input registers that are initialized per-wave.
+static void allocateSystemSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ SIMachineFunctionInfo &Info,
+ bool IsShader) {
+ if (Info.hasWorkGroupIDX()) {
+ unsigned Reg = Info.addWorkGroupIDX();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
CCInfo.AllocateReg(Reg);
}
- if (Info->hasWorkGroupIDY()) {
- unsigned Reg = Info->addWorkGroupIDY();
+ if (Info.hasWorkGroupIDY()) {
+ unsigned Reg = Info.addWorkGroupIDY();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
CCInfo.AllocateReg(Reg);
}
- if (Info->hasWorkGroupIDZ()) {
- unsigned Reg = Info->addWorkGroupIDZ();
+ if (Info.hasWorkGroupIDZ()) {
+ unsigned Reg = Info.addWorkGroupIDZ();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
CCInfo.AllocateReg(Reg);
}
- if (Info->hasWorkGroupInfo()) {
- unsigned Reg = Info->addWorkGroupInfo();
+ if (Info.hasWorkGroupInfo()) {
+ unsigned Reg = Info.addWorkGroupInfo();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
CCInfo.AllocateReg(Reg);
}
- if (Info->hasPrivateSegmentWaveByteOffset()) {
+ if (Info.hasPrivateSegmentWaveByteOffset()) {
// Scratch wave offset passed in system SGPR.
unsigned PrivateSegmentWaveByteOffsetReg;
- if (AMDGPU::isShader(CallConv)) {
+ if (IsShader) {
PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
- Info->setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
+ Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
} else
- PrivateSegmentWaveByteOffsetReg = Info->addPrivateSegmentWaveByteOffset();
+ PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
}
+}
+static void reservePrivateMemoryRegs(const TargetMachine &TM,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
// Now that we've figured out where the scratch register inputs are, see if
// should reserve the arguments and use them directly.
bool HasStackObjects = MF.getFrameInfo().hasStackObjects();
+
// Record that we know we have non-spill stack objects so we don't need to
// check all stack objects later.
if (HasStackObjects)
- Info->setHasNonSpillStackObjects(true);
+ Info.setHasNonSpillStackObjects(true);
// Everything live out of a block is spilled with fast regalloc, so it's
// almost certain that spilling will be required.
- if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ if (TM.getOptLevel() == CodeGenOpt::None)
HasStackObjects = true;
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
if (ST.isAmdCodeObjectV2(MF)) {
if (HasStackObjects) {
// If we have stack objects, we unquestionably need the private buffer
// resource. For the Code Object V2 ABI, this will be the first 4 user
// SGPR inputs. We can reserve those and use them directly.
- unsigned PrivateSegmentBufferReg = TRI->getPreloadedValue(
+ unsigned PrivateSegmentBufferReg = TRI.getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
- Info->setScratchRSrcReg(PrivateSegmentBufferReg);
+ Info.setScratchRSrcReg(PrivateSegmentBufferReg);
- unsigned PrivateSegmentWaveByteOffsetReg = TRI->getPreloadedValue(
+ unsigned PrivateSegmentWaveByteOffsetReg = TRI.getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
- Info->setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
+ Info.setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
} else {
unsigned ReservedBufferReg
- = TRI->reservedPrivateSegmentBufferReg(MF);
+ = TRI.reservedPrivateSegmentBufferReg(MF);
unsigned ReservedOffsetReg
- = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+ = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
// We tentatively reserve the last registers (skipping the last two
// which may contain VCC). After register allocation, we'll replace
// these with the ones immediately after those which were really
// allocated. In the prologue copies will be inserted from the argument
// to these reserved registers.
- Info->setScratchRSrcReg(ReservedBufferReg);
- Info->setScratchWaveOffsetReg(ReservedOffsetReg);
+ Info.setScratchRSrcReg(ReservedBufferReg);
+ Info.setScratchWaveOffsetReg(ReservedOffsetReg);
}
} else {
- unsigned ReservedBufferReg = TRI->reservedPrivateSegmentBufferReg(MF);
+ unsigned ReservedBufferReg = TRI.reservedPrivateSegmentBufferReg(MF);
// Without HSA, relocations are used for the scratch pointer and the
// buffer resource setup is always inserted in the prologue. Scratch wave
// offset is still in an input SGPR.
- Info->setScratchRSrcReg(ReservedBufferReg);
+ Info.setScratchRSrcReg(ReservedBufferReg);
if (HasStackObjects) {
- unsigned ScratchWaveOffsetReg = TRI->getPreloadedValue(
+ unsigned ScratchWaveOffsetReg = TRI.getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
- Info->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+ Info.setScratchWaveOffsetReg(ScratchWaveOffsetReg);
} else {
unsigned ReservedOffsetReg
- = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
- Info->setScratchWaveOffsetReg(ReservedOffsetReg);
+ = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+ Info.setScratchWaveOffsetReg(ReservedOffsetReg);
}
}
+}
- if (Info->hasWorkItemIDX()) {
- unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X);
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
- CCInfo.AllocateReg(Reg);
+SDValue SITargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ FunctionType *FType = MF.getFunction()->getFunctionType();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+
+ if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
+ const Function *Fn = MF.getFunction();
+ DiagnosticInfoUnsupported NoGraphicsHSA(
+ *Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
+ DAG.getContext()->diagnose(NoGraphicsHSA);
+ return DAG.getEntryNode();
}
- if (Info->hasWorkItemIDY()) {
- unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y);
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
- CCInfo.AllocateReg(Reg);
+ // Create stack objects that are used for emitting debugger prologue if
+ // "amdgpu-debugger-emit-prologue" attribute was specified.
+ if (ST.debuggerEmitPrologue())
+ createDebuggerPrologueStackObjects(MF);
+
+ SmallVector<ISD::InputArg, 16> Splits;
+ SmallVector<CCValAssign, 16> ArgLocs;
+ BitVector Skipped(Ins.size());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
+
+ bool IsShader = AMDGPU::isShader(CallConv);
+ bool IsKernel = AMDGPU::isKernel(CallConv);
+ bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);
+
+ if (IsShader) {
+ processShaderInputArgs(Splits, CallConv, Ins, Skipped, FType, Info);
+
+ // At least one interpolation mode must be enabled or else the GPU will
+ // hang.
+ //
+ // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
+ // set PSInputAddr, the user wants to enable some bits after the compilation
+ // based on run-time states. Since we can't know what the final PSInputEna
+ // will look like, so we shouldn't do anything here and the user should take
+ // responsibility for the correct programming.
+ //
+ // Otherwise, the following restrictions apply:
+ // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
+ // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
+ // enabled too.
+ if (CallConv == CallingConv::AMDGPU_PS &&
+ ((Info->getPSInputAddr() & 0x7F) == 0 ||
+ ((Info->getPSInputAddr() & 0xF) == 0 &&
+ Info->isPSInputAllocated(11)))) {
+ CCInfo.AllocateReg(AMDGPU::VGPR0);
+ CCInfo.AllocateReg(AMDGPU::VGPR1);
+ Info->markPSInputAllocated(0);
+ Info->markPSInputEnabled(0);
+ }
+
+ assert(!Info->hasDispatchPtr() &&
+ !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
+ !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
+ !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
+ !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
+ !Info->hasWorkItemIDZ());
+ } else {
+ assert(!IsKernel || (Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()));
}
- if (Info->hasWorkItemIDZ()) {
- unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z);
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
- CCInfo.AllocateReg(Reg);
+ if (IsEntryFunc) {
+ allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
+ allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info);
+ }
+
+ if (IsKernel) {
+ analyzeFormalArgumentsCompute(CCInfo, Ins);
+ } else {
+ CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, isVarArg);
+ CCInfo.AnalyzeFormalArguments(Splits, AssignFn);
+ }
+
+ SmallVector<SDValue, 16> Chains;
+
+ for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+ const ISD::InputArg &Arg = Ins[i];
+ if (Skipped[i]) {
+ InVals.push_back(DAG.getUNDEF(Arg.VT));
+ continue;
+ }
+
+ CCValAssign &VA = ArgLocs[ArgIdx++];
+ MVT VT = VA.getLocVT();
+
+ if (IsEntryFunc && VA.isMemLoc()) {
+ VT = Ins[i].VT;
+ EVT MemVT = VA.getLocVT();
+
+ const uint64_t Offset = Subtarget->getExplicitKernelArgOffset(MF) +
+ VA.getLocMemOffset();
+ Info->setABIArgOffset(Offset + MemVT.getStoreSize());
+
+ // The first 36 bytes of the input buffer contains information about
+ // thread group and global sizes.
+ SDValue Arg = lowerKernargMemParameter(
+ DAG, VT, MemVT, DL, Chain, Offset, Ins[i].Flags.isSExt(), &Ins[i]);
+ Chains.push_back(Arg.getValue(1));
+
+ auto *ParamTy =
+ dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
+ if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
+ ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ // On SI local pointers are just offsets into LDS, so they are always
+ // less than 16-bits. On CI and newer they could potentially be
+ // real pointers, so we can't guarantee their size.
+ Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
+ DAG.getValueType(MVT::i16));
+ }
+
+ InVals.push_back(Arg);
+ continue;
+ }
+
+ if (VA.isMemLoc())
+ report_fatal_error("memloc not supported with calling convention");
+
+ assert(VA.isRegLoc() && "Parameter must be in a register!");
+
+ unsigned Reg = VA.getLocReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
+
+ Reg = MF.addLiveIn(Reg, RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+
+ if (Arg.VT.isVector()) {
+ // Build a vector from the registers
+ Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
+ unsigned NumElements = ParamType->getVectorNumElements();
+
+ SmallVector<SDValue, 4> Regs;
+ Regs.push_back(Val);
+ for (unsigned j = 1; j != NumElements; ++j) {
+ Reg = ArgLocs[ArgIdx++].getLocReg();
+ Reg = MF.addLiveIn(Reg, RC);
+
+ SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ Regs.push_back(Copy);
+ }
+
+ // Fill up the missing vector elements
+ NumElements = Arg.VT.getVectorNumElements() - NumElements;
+ Regs.append(NumElements, DAG.getUNDEF(VT));
+
+ InVals.push_back(DAG.getBuildVector(Arg.VT, DL, Regs));
+ continue;
+ }
+
+ InVals.push_back(Val);
}
- if (Chains.empty())
- return Chain;
+ // Start adding system SGPRs.
+ if (IsEntryFunc)
+ allocateSystemSGPRs(CCInfo, MF, *Info, IsShader);
+
+ reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ return Chains.empty() ? Chain :
+ DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
SDValue
@@ -1197,7 +1398,7 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (Flag.getNode())
RetOps.push_back(Flag);
- unsigned Opc = Info->returnsVoid() ? AMDGPUISD::ENDPGM : AMDGPUISD::RETURN;
+ unsigned Opc = Info->returnsVoid() ? AMDGPUISD::ENDPGM : AMDGPUISD::RETURN_TO_EPILOG;
return DAG.getNode(Opc, DL, MVT::Other, RetOps);
}
@@ -1470,16 +1671,16 @@ static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII,
VGPRIndexMode::SRC0_ENABLE : VGPRIndexMode::DST_ENABLE;
if (Offset == 0) {
MachineInstr *SetOn =
- BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
- .addOperand(*Idx)
- .addImm(IdxMode);
+ BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
+ .add(*Idx)
+ .addImm(IdxMode);
SetOn->getOperand(3).setIsUndef();
} else {
unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp)
- .addOperand(*Idx)
- .addImm(Offset);
+ .add(*Idx)
+ .addImm(Offset);
MachineInstr *SetOn =
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
.addReg(Tmp, RegState::Kill)
@@ -1493,10 +1694,10 @@ static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII,
if (Offset == 0) {
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addOperand(*Idx);
+ .add(*Idx);
} else {
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
- .addOperand(*Idx)
+ .add(*Idx)
.addImm(Offset);
}
@@ -1522,7 +1723,7 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
std::tie(SubReg, Offset)
= computeIndirectRegAndOffset(TRI, VecRC, SrcReg, Offset);
- bool UseGPRIdxMode = ST.hasVGPRIndexMode() && EnableVGPRIndexMode;
+ bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode);
if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, true)) {
MachineBasicBlock::iterator I(&MI);
@@ -1548,7 +1749,6 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
return &MBB;
}
-
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
@@ -1625,7 +1825,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC,
SrcVec->getReg(),
Offset);
- bool UseGPRIdxMode = ST.hasVGPRIndexMode() && EnableVGPRIndexMode;
+ bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode);
if (Idx->getReg() == AMDGPU::NoRegister) {
MachineBasicBlock::iterator I(&MI);
@@ -1634,9 +1834,9 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
assert(Offset == 0);
BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dst)
- .addOperand(*SrcVec)
- .addOperand(*Val)
- .addImm(SubReg);
+ .add(*SrcVec)
+ .add(*Val)
+ .addImm(SubReg);
MI.eraseFromParent();
return &MBB;
@@ -1648,11 +1848,11 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
if (UseGPRIdxMode) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_indirect))
- .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst
- .addOperand(*Val)
- .addReg(Dst, RegState::ImplicitDefine)
- .addReg(SrcVec->getReg(), RegState::Implicit)
- .addReg(AMDGPU::M0, RegState::Implicit);
+ .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst
+ .add(*Val)
+ .addReg(Dst, RegState::ImplicitDefine)
+ .addReg(SrcVec->getReg(), RegState::Implicit)
+ .addReg(AMDGPU::M0, RegState::Implicit);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
} else {
@@ -1661,7 +1861,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
BuildMI(MBB, I, DL, MovRelDesc)
.addReg(Dst, RegState::Define)
.addReg(SrcVec->getReg())
- .addOperand(*Val)
+ .add(*Val)
.addImm(SubReg - AMDGPU::sub0);
}
@@ -1694,18 +1894,18 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
if (UseGPRIdxMode) {
BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_indirect))
- .addReg(PhiReg, RegState::Undef, SubReg) // vdst
- .addOperand(*Val) // src0
- .addReg(Dst, RegState::ImplicitDefine)
- .addReg(PhiReg, RegState::Implicit)
- .addReg(AMDGPU::M0, RegState::Implicit);
+ .addReg(PhiReg, RegState::Undef, SubReg) // vdst
+ .add(*Val) // src0
+ .addReg(Dst, RegState::ImplicitDefine)
+ .addReg(PhiReg, RegState::Implicit)
+ .addReg(AMDGPU::M0, RegState::Implicit);
} else {
const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC));
BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
.addReg(Dst, RegState::Define)
.addReg(PhiReg)
- .addOperand(*Val)
+ .add(*Val)
.addImm(SubReg - AMDGPU::sub0);
}
@@ -1741,18 +1941,62 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
switch (MI.getOpcode()) {
- case AMDGPU::SI_INIT_M0: {
+ case AMDGPU::S_TRAP_PSEUDO: {
+ const DebugLoc &DL = MI.getDebugLoc();
+ const int TrapType = MI.getOperand(0).getImm();
+
+ if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa &&
+ Subtarget->isTrapHandlerEnabled()) {
+
+ MachineFunction *MF = BB->getParent();
+ SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+ unsigned UserSGPR = Info->getQueuePtrUserSGPR();
+ assert(UserSGPR != AMDGPU::NoRegister);
+
+ if (!BB->isLiveIn(UserSGPR))
+ BB->addLiveIn(UserSGPR);
+
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1)
+ .addReg(UserSGPR);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP))
+ .addImm(TrapType)
+ .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit);
+ } else {
+ switch (TrapType) {
+ case SISubtarget::TrapIDLLVMTrap:
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM));
+ break;
+ case SISubtarget::TrapIDLLVMDebugTrap: {
+ DiagnosticInfoUnsupported NoTrap(*MF->getFunction(),
+ "debugtrap handler not supported",
+ DL,
+ DS_Warning);
+ LLVMContext &C = MF->getFunction()->getContext();
+ C.diagnose(NoTrap);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_NOP))
+ .addImm(0);
+ break;
+ }
+ default:
+ llvm_unreachable("unsupported trap handler type!");
+ }
+ }
+
+ MI.eraseFromParent();
+ return BB;
+ }
+ case AMDGPU::SI_INIT_M0:
BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addOperand(MI.getOperand(0));
+ .add(MI.getOperand(0));
MI.eraseFromParent();
return BB;
- }
+
case AMDGPU::GET_GROUPSTATICSIZE: {
DebugLoc DL = MI.getDebugLoc();
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
- .addOperand(MI.getOperand(0))
- .addImm(MFI->getLDSSize());
+ .add(MI.getOperand(0))
+ .addImm(MFI->getLDSSize());
MI.eraseFromParent();
return BB;
}
@@ -1803,7 +2047,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
const DebugLoc &DL = MI.getDebugLoc();
MachineInstr *Br = BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC1))
- .addOperand(MI.getOperand(0));
+ .add(MI.getOperand(0));
Br->getOperand(1).setIsUndef(true); // read undef SCC
MI.eraseFromParent();
return BB;
@@ -1856,9 +2100,6 @@ MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT VT) const {
bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
VT = VT.getScalarType();
- if (!VT.isSimple())
- return false;
-
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
// This is as fast on some subtargets. However, we always have full rate f32
@@ -1909,13 +2150,52 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG);
- case ISD::TRAP: return lowerTRAP(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::FP_ROUND:
return lowerFP_ROUND(Op, DAG);
}
return SDValue();
}
+void SITargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ case ISD::INSERT_VECTOR_ELT: {
+ if (SDValue Res = lowerINSERT_VECTOR_ELT(SDValue(N, 0), DAG))
+ Results.push_back(Res);
+ return;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ if (SDValue Res = lowerEXTRACT_VECTOR_ELT(SDValue(N, 0), DAG))
+ Results.push_back(Res);
+ return;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IID) {
+ case Intrinsic::amdgcn_cvt_pkrtz: {
+ SDValue Src0 = N->getOperand(1);
+ SDValue Src1 = N->getOperand(2);
+ SDLoc SL(N);
+ SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, SL, MVT::i32,
+ Src0, Src1);
+
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt));
+ return;
+ }
+ default:
+ break;
+ }
+ }
+ default:
+ break;
+ }
+}
+
/// \brief Helper function for LowerBRCOND
static SDNode *findUser(SDValue Value, unsigned Opcode) {
@@ -1932,31 +2212,25 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
return nullptr;
}
-bool SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
+unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
if (Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
switch (cast<ConstantSDNode>(Intr->getOperand(1))->getZExtValue()) {
- case AMDGPUIntrinsic::amdgcn_if:
- case AMDGPUIntrinsic::amdgcn_else:
- case AMDGPUIntrinsic::amdgcn_end_cf:
- case AMDGPUIntrinsic::amdgcn_loop:
- return true;
+ case Intrinsic::amdgcn_if:
+ return AMDGPUISD::IF;
+ case Intrinsic::amdgcn_else:
+ return AMDGPUISD::ELSE;
+ case Intrinsic::amdgcn_loop:
+ return AMDGPUISD::LOOP;
+ case Intrinsic::amdgcn_end_cf:
+ llvm_unreachable("should not occur");
default:
- return false;
+ return 0;
}
}
- if (Intr->getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
- switch (cast<ConstantSDNode>(Intr->getOperand(0))->getZExtValue()) {
- case AMDGPUIntrinsic::amdgcn_break:
- case AMDGPUIntrinsic::amdgcn_if_break:
- case AMDGPUIntrinsic::amdgcn_else_break:
- return true;
- default:
- return false;
- }
- }
-
- return false;
+ // break, if_break, else_break are all only used as inputs to loop, not
+ // directly as branch conditions.
+ return 0;
}
void SITargetLowering::createDebuggerPrologueStackObjects(
@@ -1987,13 +2261,13 @@ void SITargetLowering::createDebuggerPrologueStackObjects(
bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
const Triple &TT = getTargetMachine().getTargetTriple();
- return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+ return GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
AMDGPU::shouldEmitConstantsToTextSection(TT);
}
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
- return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) &&
+ return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
+ GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
!shouldEmitFixup(GV) &&
!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
}
@@ -2006,7 +2280,6 @@ bool SITargetLowering::shouldEmitPCReloc(const GlobalValue *GV) const {
/// last parameter, also switches branch target with BR if the need arise
SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
SelectionDAG &DAG) const {
-
SDLoc DL(BRCOND);
SDNode *Intr = BRCOND.getOperand(1).getNode();
@@ -2032,7 +2305,8 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
// eg: i1,ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3
// => t9: ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3, BasicBlock:ch<bb1 0x7fee5286d088>
- if (!isCFIntrinsic(Intr)) {
+ unsigned CFNode = isCFIntrinsic(Intr);
+ if (CFNode == 0) {
// This is a uniform branch so we don't need to legalize.
return BRCOND;
}
@@ -2050,15 +2324,13 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
if (HaveChain)
Ops.push_back(BRCOND.getOperand(0));
- Ops.append(Intr->op_begin() + (HaveChain ? 1 : 0), Intr->op_end());
+ Ops.append(Intr->op_begin() + (HaveChain ? 2 : 1), Intr->op_end());
Ops.push_back(Target);
ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());
// build the new intrinsic call
- SDNode *Result = DAG.getNode(
- Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
- DAG.getVTList(Res), Ops).getNode();
+ SDNode *Result = DAG.getNode(CFNode, DL, DAG.getVTList(Res), Ops).getNode();
if (!HaveChain) {
SDValue Ops[] = {
@@ -2130,9 +2402,28 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);;
}
-SDValue SITargetLowering::getSegmentAperture(unsigned AS,
+SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const {
- SDLoc SL;
+ // FIXME: Use inline constants (src_{shared, private}_base) instead.
+ if (Subtarget->hasApertureRegs()) {
+ unsigned Offset = AS == AMDGPUASI.LOCAL_ADDRESS ?
+ AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
+ AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
+ unsigned WidthM1 = AS == AMDGPUASI.LOCAL_ADDRESS ?
+ AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
+ AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
+ unsigned Encoding =
+ AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
+ Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
+ WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
+
+ SDValue EncodingImm = DAG.getTargetConstant(Encoding, DL, MVT::i16);
+ SDValue ApertureReg = SDValue(
+ DAG.getMachineNode(AMDGPU::S_GETREG_B32, DL, MVT::i32, EncodingImm), 0);
+ SDValue ShiftAmount = DAG.getTargetConstant(WidthM1 + 1, DL, MVT::i32);
+ return DAG.getNode(ISD::SHL, DL, MVT::i32, ApertureReg, ShiftAmount);
+ }
+
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
unsigned UserSGPR = Info->getQueuePtrUserSGPR();
@@ -2143,19 +2434,19 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
// Offset into amd_queue_t for group_segment_aperture_base_hi /
// private_segment_aperture_base_hi.
- uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
+ uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44;
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, QueuePtr,
- DAG.getConstant(StructOffset, SL, MVT::i64));
+ SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, QueuePtr,
+ DAG.getConstant(StructOffset, DL, MVT::i64));
// TODO: Use custom target PseudoSourceValue.
// TODO: We should use the value from the IR intrinsic call, but it might not
// be available and how do we get it?
Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()),
- AMDGPUAS::CONSTANT_ADDRESS));
+ AMDGPUASI.CONSTANT_ADDRESS));
MachinePointerInfo PtrInfo(V, StructOffset);
- return DAG.getLoad(MVT::i32, SL, QueuePtr.getValue(1), Ptr, PtrInfo,
+ return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo,
MinAlign(64, StructOffset),
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
@@ -2167,15 +2458,19 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
SDValue Src = ASC->getOperand(0);
-
- // FIXME: Really support non-0 null pointers.
- SDValue SegmentNullPtr = DAG.getConstant(-1, SL, MVT::i32);
SDValue FlatNullPtr = DAG.getConstant(0, SL, MVT::i64);
+ const AMDGPUTargetMachine &TM =
+ static_cast<const AMDGPUTargetMachine &>(getTargetMachine());
+
// flat -> local/private
- if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
- if (ASC->getDestAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
- ASC->getDestAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+ if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
+ unsigned DestAS = ASC->getDestAddressSpace();
+
+ if (DestAS == AMDGPUASI.LOCAL_ADDRESS ||
+ DestAS == AMDGPUASI.PRIVATE_ADDRESS) {
+ unsigned NullVal = TM.getNullPointerValue(DestAS);
+ SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
SDValue Ptr = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);
@@ -2185,13 +2480,18 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
}
// local/private -> flat
- if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
- if (ASC->getSrcAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
- ASC->getSrcAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+ if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
+ unsigned SrcAS = ASC->getSrcAddressSpace();
+
+ if (SrcAS == AMDGPUASI.LOCAL_ADDRESS ||
+ SrcAS == AMDGPUASI.PRIVATE_ADDRESS) {
+ unsigned NullVal = TM.getNullPointerValue(SrcAS);
+ SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
+
SDValue NonNull
= DAG.getSetCC(SL, MVT::i1, Src, SegmentNullPtr, ISD::SETNE);
- SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), DAG);
+ SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), SL, DAG);
SDValue CvtPtr
= DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Aperture);
@@ -2211,17 +2511,88 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
return DAG.getUNDEF(ASC->getValueType(0));
}
+SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Idx = Op.getOperand(2);
+ if (isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ // Avoid stack access for dynamic indexing.
+ SDLoc SL(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Val = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Op.getOperand(1));
+
+ // v_bfi_b32 (v_bfm_b32 16, (shl idx, 16)), val, vec
+ SDValue ExtVal = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Val);
+
+ // Convert vector index to bit-index.
+ SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx,
+ DAG.getConstant(16, SL, MVT::i32));
+
+ SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
+
+ SDValue BFM = DAG.getNode(ISD::SHL, SL, MVT::i32,
+ DAG.getConstant(0xffff, SL, MVT::i32),
+ ScaledIdx);
+
+ SDValue LHS = DAG.getNode(ISD::AND, SL, MVT::i32, BFM, ExtVal);
+ SDValue RHS = DAG.getNode(ISD::AND, SL, MVT::i32,
+ DAG.getNOT(SL, BFM, MVT::i32), BCVec);
+
+ SDValue BFI = DAG.getNode(ISD::OR, SL, MVT::i32, LHS, RHS);
+ return DAG.getNode(ISD::BITCAST, SL, Op.getValueType(), BFI);
+}
+
+SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+
+ EVT ResultVT = Op.getValueType();
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+
+ if (const ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
+
+ if (CIdx->getZExtValue() == 1) {
+ Result = DAG.getNode(ISD::SRL, SL, MVT::i32, Result,
+ DAG.getConstant(16, SL, MVT::i32));
+ } else {
+ assert(CIdx->getZExtValue() == 0);
+ }
+
+ if (ResultVT.bitsLT(MVT::i32))
+ Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i16, Result);
+ return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result);
+ }
+
+ SDValue Sixteen = DAG.getConstant(16, SL, MVT::i32);
+
+ // Convert vector index to bit-index.
+ SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx, Sixteen);
+
+ SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
+ SDValue Elt = DAG.getNode(ISD::SRL, SL, MVT::i32, BC, ScaledIdx);
+
+ SDValue Result = Elt;
+ if (ResultVT.bitsLT(MVT::i32))
+ Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i16, Result);
+
+ return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result);
+}
+
bool
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// We can fold offsets for anything that doesn't require a GOT relocation.
- return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
- GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) &&
+ return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
+ GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
!shouldEmitGOTReloc(GA->getGlobal());
}
-static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
- SDLoc DL, unsigned Offset, EVT PtrVT,
- unsigned GAFlags = SIInstrInfo::MO_NONE) {
+static SDValue
+buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
+ const SDLoc &DL, unsigned Offset, EVT PtrVT,
+ unsigned GAFlags = SIInstrInfo::MO_NONE) {
// In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is
// lowered to the following code sequence:
//
@@ -2265,8 +2636,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
- if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
- GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
+ if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
+ GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
SDLoc DL(GSD);
@@ -2283,7 +2654,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SIInstrInfo::MO_GOTPCREL32);
Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
const DataLayout &DataLayout = DAG.getDataLayout();
unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
// FIXME: Use a PseudoSourceValue once those can be assigned an address space.
@@ -2294,23 +2665,6 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
MachineMemOperand::MOInvariant);
}
-SDValue SITargetLowering::lowerTRAP(SDValue Op,
- SelectionDAG &DAG) const {
- const MachineFunction &MF = DAG.getMachineFunction();
- DiagnosticInfoUnsupported NoTrap(*MF.getFunction(),
- "trap handler not supported",
- Op.getDebugLoc(),
- DS_Warning);
- DAG.getContext()->diagnose(NoTrap);
-
- // Emit s_endpgm.
-
- // FIXME: This should really be selected to s_trap, but that requires
- // setting up the trap handler for it o do anything.
- return DAG.getNode(AMDGPUISD::ENDPGM, SDLoc(Op), MVT::Other,
- Op.getOperand(0));
-}
-
SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain,
const SDLoc &DL, SDValue V) const {
// We can't use S_MOV_B32 directly, because there is no way to specify m0 as
@@ -2332,14 +2686,15 @@ SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
MVT VT,
unsigned Offset) const {
SDLoc SL(Op);
- SDValue Param = LowerParameter(DAG, MVT::i32, MVT::i32, SL,
- DAG.getEntryNode(), Offset, false);
+ SDValue Param = lowerKernargMemParameter(DAG, MVT::i32, MVT::i32, SL,
+ DAG.getEntryNode(), Offset, false);
// The local size values will have the hi 16-bits as zero.
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
DAG.getValueType(VT));
}
-static SDValue emitNonHSAIntrinsicError(SelectionDAG& DAG, SDLoc DL, EVT VT) {
+static SDValue emitNonHSAIntrinsicError(SelectionDAG &DAG, const SDLoc &DL,
+ EVT VT) {
DiagnosticInfoUnsupported BadIntrin(*DAG.getMachineFunction().getFunction(),
"non-hsa intrinsic with hsa target",
DL.getDebugLoc());
@@ -2347,7 +2702,8 @@ static SDValue emitNonHSAIntrinsicError(SelectionDAG& DAG, SDLoc DL, EVT VT) {
return DAG.getUNDEF(VT);
}
-static SDValue emitRemovedIntrinsicError(SelectionDAG& DAG, SDLoc DL, EVT VT) {
+static SDValue emitRemovedIntrinsicError(SelectionDAG &DAG, const SDLoc &DL,
+ EVT VT) {
DiagnosticInfoUnsupported BadIntrin(*DAG.getMachineFunction().getFunction(),
"intrinsic not supported on subtarget",
DL.getDebugLoc());
@@ -2389,7 +2745,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_implicitarg_ptr: {
unsigned offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
- return LowerParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
+ return lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
}
case Intrinsic::amdgcn_kernarg_segment_ptr: {
unsigned Reg
@@ -2403,19 +2759,16 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_rcp:
return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_rsq:
- case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
- case Intrinsic::amdgcn_rsq_legacy: {
+ case Intrinsic::amdgcn_rsq_legacy:
if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
- }
- case Intrinsic::amdgcn_rcp_legacy: {
+ case Intrinsic::amdgcn_rcp_legacy:
if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
return DAG.getNode(AMDGPUISD::RCP_LEGACY, DL, VT, Op.getOperand(1));
- }
case Intrinsic::amdgcn_rsq_clamp: {
if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
@@ -2434,38 +2787,38 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_X, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_X, false);
case Intrinsic::r600_read_ngroups_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Y, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_Y, false);
case Intrinsic::r600_read_ngroups_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Z, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_Z, false);
case Intrinsic::r600_read_global_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
case Intrinsic::r600_read_global_size_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
case Intrinsic::r600_read_global_size_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
case Intrinsic::r600_read_local_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
@@ -2522,43 +2875,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
Op->getVTList(), Ops, VT, MMO);
}
- case AMDGPUIntrinsic::amdgcn_fdiv_fast: {
+ case Intrinsic::amdgcn_fdiv_fast:
return lowerFDIV_FAST(Op, DAG);
- }
- case AMDGPUIntrinsic::SI_vs_load_input:
- return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
-
- case AMDGPUIntrinsic::SI_fs_constant: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
- SDValue Glue = M0.getValue(1);
- return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
- DAG.getConstant(2, DL, MVT::i32), // P0
- Op.getOperand(1), Op.getOperand(2), Glue);
- }
- case AMDGPUIntrinsic::SI_packf16:
- if (Op.getOperand(1).isUndef() && Op.getOperand(2).isUndef())
- return DAG.getUNDEF(MVT::i32);
- return Op;
- case AMDGPUIntrinsic::SI_fs_interp: {
- SDValue IJ = Op.getOperand(4);
- SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
- DAG.getConstant(1, DL, MVT::i32));
- I = DAG.getNode(ISD::BITCAST, DL, MVT::f32, I);
- J = DAG.getNode(ISD::BITCAST, DL, MVT::f32, J);
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
- SDValue Glue = M0.getValue(1);
- SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL,
- DAG.getVTList(MVT::f32, MVT::Glue),
- I, Op.getOperand(1), Op.getOperand(2), Glue);
- Glue = SDValue(P1.getNode(), 1);
- return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
- Op.getOperand(1), Op.getOperand(2), Glue);
- }
case Intrinsic::amdgcn_interp_mov: {
SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
SDValue Glue = M0.getValue(1);
@@ -2639,10 +2957,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_icmp: {
const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- int CondCode = CD->getSExtValue();
+ if (!CD)
+ return DAG.getUNDEF(VT);
+ int CondCode = CD->getSExtValue();
if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
- CondCode >= ICmpInst::Predicate::BAD_ICMP_PREDICATE)
+ CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
return DAG.getUNDEF(VT);
ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
@@ -2652,10 +2972,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_fcmp: {
const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- int CondCode = CD->getSExtValue();
+ if (!CD)
+ return DAG.getUNDEF(VT);
- if (CondCode <= FCmpInst::Predicate::FCMP_FALSE ||
- CondCode >= FCmpInst::Predicate::FCMP_TRUE)
+ int CondCode = CD->getSExtValue();
+ if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
+ CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE)
return DAG.getUNDEF(VT);
FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
@@ -2663,14 +2985,29 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
Op.getOperand(2), DAG.getCondCode(CCOpcode));
}
+ case Intrinsic::amdgcn_fmed3:
+ return DAG.getNode(AMDGPUISD::FMED3, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_fmul_legacy:
return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT,
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::amdgcn_sffbh:
- case AMDGPUIntrinsic::AMDGPU_flbit_i32: // Legacy name.
return DAG.getNode(AMDGPUISD::FFBH_I32, DL, VT, Op.getOperand(1));
+ case Intrinsic::amdgcn_sbfe:
+ return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::amdgcn_ubfe:
+ return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::amdgcn_cvt_pkrtz: {
+ // FIXME: Stop adding cast if v2f16 legal.
+ EVT VT = Op.getValueType();
+ SDValue Node = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, DL, MVT::i32,
+ Op.getOperand(1), Op.getOperand(2));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Node);
+ }
default:
- return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ return Op;
}
}
@@ -2718,6 +3055,64 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
}
+ // Basic sample.
+ case Intrinsic::amdgcn_image_sample:
+ case Intrinsic::amdgcn_image_sample_cl:
+ case Intrinsic::amdgcn_image_sample_d:
+ case Intrinsic::amdgcn_image_sample_d_cl:
+ case Intrinsic::amdgcn_image_sample_l:
+ case Intrinsic::amdgcn_image_sample_b:
+ case Intrinsic::amdgcn_image_sample_b_cl:
+ case Intrinsic::amdgcn_image_sample_lz:
+ case Intrinsic::amdgcn_image_sample_cd:
+ case Intrinsic::amdgcn_image_sample_cd_cl:
+
+ // Sample with comparison.
+ case Intrinsic::amdgcn_image_sample_c:
+ case Intrinsic::amdgcn_image_sample_c_cl:
+ case Intrinsic::amdgcn_image_sample_c_d:
+ case Intrinsic::amdgcn_image_sample_c_d_cl:
+ case Intrinsic::amdgcn_image_sample_c_l:
+ case Intrinsic::amdgcn_image_sample_c_b:
+ case Intrinsic::amdgcn_image_sample_c_b_cl:
+ case Intrinsic::amdgcn_image_sample_c_lz:
+ case Intrinsic::amdgcn_image_sample_c_cd:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl:
+
+ // Sample with offsets.
+ case Intrinsic::amdgcn_image_sample_o:
+ case Intrinsic::amdgcn_image_sample_cl_o:
+ case Intrinsic::amdgcn_image_sample_d_o:
+ case Intrinsic::amdgcn_image_sample_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_l_o:
+ case Intrinsic::amdgcn_image_sample_b_o:
+ case Intrinsic::amdgcn_image_sample_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_lz_o:
+ case Intrinsic::amdgcn_image_sample_cd_o:
+ case Intrinsic::amdgcn_image_sample_cd_cl_o:
+
+ // Sample with comparison and offsets.
+ case Intrinsic::amdgcn_image_sample_c_o:
+ case Intrinsic::amdgcn_image_sample_c_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_d_o:
+ case Intrinsic::amdgcn_image_sample_c_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_l_o:
+ case Intrinsic::amdgcn_image_sample_c_b_o:
+ case Intrinsic::amdgcn_image_sample_c_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_lz_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
+
+ case Intrinsic::amdgcn_image_getlod: {
+ // Replace dmask with everything disabled with undef.
+ const ConstantSDNode *DMask = dyn_cast<ConstantSDNode>(Op.getOperand(5));
+ if (!DMask || DMask->isNullValue()) {
+ SDValue Undef = DAG.getUNDEF(Op.getValueType());
+ return DAG.getMergeValues({ Undef, Op.getOperand(0) }, SDLoc(Op));
+ }
+
+ return SDValue();
+ }
default:
return SDValue();
}
@@ -2731,17 +3126,60 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
- case AMDGPUIntrinsic::SI_sendmsg:
- case Intrinsic::amdgcn_s_sendmsg: {
- Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
- SDValue Glue = Chain.getValue(1);
- return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
- Op.getOperand(2), Glue);
+ case Intrinsic::amdgcn_exp: {
+ const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
+ const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
+ const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(8));
+ const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(9));
+
+ const SDValue Ops[] = {
+ Chain,
+ DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
+ DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
+ Op.getOperand(4), // src0
+ Op.getOperand(5), // src1
+ Op.getOperand(6), // src2
+ Op.getOperand(7), // src3
+ DAG.getTargetConstant(0, DL, MVT::i1), // compr
+ DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
+ };
+
+ unsigned Opc = Done->isNullValue() ?
+ AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
+ return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
+ }
+ case Intrinsic::amdgcn_exp_compr: {
+ const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
+ const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
+ SDValue Src0 = Op.getOperand(4);
+ SDValue Src1 = Op.getOperand(5);
+ const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
+ const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(7));
+
+ SDValue Undef = DAG.getUNDEF(MVT::f32);
+ const SDValue Ops[] = {
+ Chain,
+ DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
+ DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
+ DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0),
+ DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1),
+ Undef, // src2
+ Undef, // src3
+ DAG.getTargetConstant(1, DL, MVT::i1), // compr
+ DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
+ };
+
+ unsigned Opc = Done->isNullValue() ?
+ AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
+ return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
}
+ case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
+ unsigned NodeOp = (IntrinsicID == Intrinsic::amdgcn_s_sendmsg) ?
+ AMDGPUISD::SENDMSG : AMDGPUISD::SENDMSGHALT;
Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
SDValue Glue = Chain.getValue(1);
- return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain,
+ return DAG.getNode(NodeOp, DL, MVT::Other, Chain,
Op.getOperand(2), Glue);
}
case AMDGPUIntrinsic::SI_tbuffer_store: {
@@ -2784,31 +3222,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
}
- case AMDGPUIntrinsic::SI_export: {
- const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(2));
- const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(3));
- const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(4));
- const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(5));
- const ConstantSDNode *Compr = cast<ConstantSDNode>(Op.getOperand(6));
-
- const SDValue Ops[] = {
- Chain,
- DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8),
- DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1),
- DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8),
- DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i1),
- Op.getOperand(7), // src0
- Op.getOperand(8), // src1
- Op.getOperand(9), // src2
- Op.getOperand(10) // src3
- };
-
- unsigned Opc = Done->isNullValue() ?
- AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
- return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
- }
- default:
+ case Intrinsic::amdgcn_s_barrier: {
+ if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second;
+ if (WGSize <= ST.getWavefrontSize())
+ return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
+ Op.getOperand(0)), 0);
+ }
return SDValue();
+ };
+ default:
+ return Op;
}
}
@@ -2857,21 +3283,20 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
- if (AS == AMDGPUAS::FLAT_ADDRESS)
+ if (AS == AMDGPUASI.FLAT_ADDRESS)
AS = MFI->hasFlatScratchInit() ?
- AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
+ AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
unsigned NumElements = MemVT.getVectorNumElements();
- switch (AS) {
- case AMDGPUAS::CONSTANT_ADDRESS:
+ if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
if (isMemOpUniform(Load))
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
// loads.
//
- LLVM_FALLTHROUGH;
- case AMDGPUAS::GLOBAL_ADDRESS: {
+ }
+ if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
isMemOpHasNoClobberedMemOperand(Load))
return SDValue();
@@ -2880,13 +3305,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// loads.
//
}
- LLVM_FALLTHROUGH;
- case AMDGPUAS::FLAT_ADDRESS:
+ if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS ||
+ AS == AMDGPUASI.FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorLoad(Op, DAG);
// v4 loads are supported for private and global memory.
return SDValue();
- case AMDGPUAS::PRIVATE_ADDRESS: {
+ }
+ if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
// Depending on the setting of the private_element_size field in the
// resource descriptor, we can only make private accesses up to a certain
// size.
@@ -2905,8 +3331,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("unsupported private_element_size");
}
- }
- case AMDGPUAS::LOCAL_ADDRESS: {
+ } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
if (NumElements > 2)
return SplitVectorLoad(Op, DAG);
@@ -2916,9 +3341,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// If properly aligned, if we split we might be able to use ds_read_b64.
return SplitVectorLoad(Op, DAG);
}
- default:
- return SDValue();
- }
+ return SDValue();
}
SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
@@ -3287,18 +3710,17 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
- if (AS == AMDGPUAS::FLAT_ADDRESS)
+ if (AS == AMDGPUASI.FLAT_ADDRESS)
AS = MFI->hasFlatScratchInit() ?
- AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
+ AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
unsigned NumElements = VT.getVectorNumElements();
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS:
- case AMDGPUAS::FLAT_ADDRESS:
+ if (AS == AMDGPUASI.GLOBAL_ADDRESS ||
+ AS == AMDGPUASI.FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorStore(Op, DAG);
return SDValue();
- case AMDGPUAS::PRIVATE_ADDRESS: {
+ } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
switch (Subtarget->getMaxPrivateElementSize()) {
case 4:
return scalarizeVectorStore(Store, DAG);
@@ -3313,8 +3735,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("unsupported private_element_size");
}
- }
- case AMDGPUAS::LOCAL_ADDRESS: {
+ } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
if (NumElements > 2)
return SplitVectorStore(Op, DAG);
@@ -3323,8 +3744,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// If properly aligned, if we split we might be able to use ds_write_b64.
return SplitVectorStore(Op, DAG);
- }
- default:
+ } else {
llvm_unreachable("unhandled address space");
}
}
@@ -3355,7 +3775,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co
unsigned AS = AtomicNode->getAddressSpace();
// No custom lowering required for local address space
- if (!isFlatGlobalAddrSpace(AS))
+ if (!isFlatGlobalAddrSpace(AS, AMDGPUASI))
return Op;
// Non-local address space requires custom lowering for atomic compare
@@ -3412,12 +3832,12 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
/// the immediate offsets of a memory instruction for the given address space.
static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
const SISubtarget &STI) {
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS: {
+ auto AMDGPUASI = STI.getAMDGPUAS();
+ if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
// MUBUF instructions a 12-bit offset in bytes.
return isUInt<12>(OffsetSize);
}
- case AMDGPUAS::CONSTANT_ADDRESS: {
+ if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
// SMRD instructions have an 8-bit offset in dwords on SI and
// a 20-bit offset in bytes on VI.
if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
@@ -3425,16 +3845,13 @@ static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
else
return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
}
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS: {
+ if (AS == AMDGPUASI.LOCAL_ADDRESS ||
+ AS == AMDGPUASI.REGION_ADDRESS) {
// The single offset versions have a 16-bit offset in bytes.
return isUInt<16>(OffsetSize);
}
- case AMDGPUAS::PRIVATE_ADDRESS:
// Indirect register addressing does not use any offsets.
- default:
- return 0;
- }
+ return false;
}
// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
@@ -3492,7 +3909,7 @@ SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N,
// TODO: We could also do this for multiplies.
unsigned AS = N->getAddressSpace();
- if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
+ if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUASI.PRIVATE_ADDRESS) {
SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
if (NewPtr) {
SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());
@@ -3692,6 +4109,88 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
return SDValue();
}
+// Instructions that will be lowered with a final instruction that zeros the
+// high result bits.
+// XXX - probably only need to list legal operations.
+static bool fp16SrcZerosHighBits(unsigned Opc) {
+ switch (Opc) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FMA:
+ case ISD::FMAD:
+ case ISD::FCANONICALIZE:
+ case ISD::FP_ROUND:
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::FABS:
+ // Fabs is lowered to a bit operation, but it's an and which will clear the
+ // high bits anyway.
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FPOWI:
+ case ISD::FPOW:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FROUND:
+ case ISD::FFLOOR:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case AMDGPUISD::FRACT:
+ case AMDGPUISD::CLAMP:
+ case AMDGPUISD::COS_HW:
+ case AMDGPUISD::SIN_HW:
+ case AMDGPUISD::FMIN3:
+ case AMDGPUISD::FMAX3:
+ case AMDGPUISD::FMED3:
+ case AMDGPUISD::FMAD_FTZ:
+ case AMDGPUISD::RCP:
+ case AMDGPUISD::RSQ:
+ case AMDGPUISD::LDEXP:
+ return true;
+ default:
+ // fcopysign, select and others may be lowered to 32-bit bit operations
+ // which don't zero the high bits.
+ return false;
+ }
+}
+
+SDValue SITargetLowering::performZeroExtendCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ if (!Subtarget->has16BitInsts() ||
+ DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+
+ SDValue Src = N->getOperand(0);
+ if (Src.getValueType() != MVT::i16)
+ return SDValue();
+
+ // (i32 zext (i16 (bitcast f16:$src))) -> fp16_zext $src
+ // FIXME: It is not universally true that the high bits are zeroed on gfx9.
+ if (Src.getOpcode() == ISD::BITCAST) {
+ SDValue BCSrc = Src.getOperand(0);
+ if (BCSrc.getValueType() == MVT::f16 &&
+ fp16SrcZerosHighBits(BCSrc.getOpcode()))
+ return DCI.DAG.getNode(AMDGPUISD::FP16_ZEXT, SDLoc(N), VT, BCSrc);
+ }
+
+ return SDValue();
+}
+
SDValue SITargetLowering::performClassCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -3713,7 +4212,7 @@ SDValue SITargetLowering::performClassCombine(SDNode *N,
SDValue SITargetLowering::performFCanonicalizeCombine(
SDNode *N,
DAGCombinerInfo &DCI) const {
- ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
+ ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0));
if (!CFP)
return SDValue();
@@ -3723,13 +4222,14 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
// Flush denormals to 0 if not enabled.
if (C.isDenormal()) {
EVT VT = N->getValueType(0);
- if (VT == MVT::f32 && !Subtarget->hasFP32Denormals())
+ EVT SVT = VT.getScalarType();
+ if (SVT == MVT::f32 && !Subtarget->hasFP32Denormals())
return DAG.getConstantFP(0.0, SDLoc(N), VT);
- if (VT == MVT::f64 && !Subtarget->hasFP64Denormals())
+ if (SVT == MVT::f64 && !Subtarget->hasFP64Denormals())
return DAG.getConstantFP(0.0, SDLoc(N), VT);
- if (VT == MVT::f16 && !Subtarget->hasFP16Denormals())
+ if (SVT == MVT::f16 && !Subtarget->hasFP16Denormals())
return DAG.getConstantFP(0.0, SDLoc(N), VT);
}
@@ -3749,7 +4249,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
}
- return SDValue(CFP, 0);
+ return N->getOperand(0);
}
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
@@ -3771,8 +4271,9 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
}
}
-static SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
- SDValue Op0, SDValue Op1, bool Signed) {
+SDValue SITargetLowering::performIntMed3ImmCombine(
+ SelectionDAG &DAG, const SDLoc &SL,
+ SDValue Op0, SDValue Op1, bool Signed) const {
ConstantSDNode *K1 = dyn_cast<ConstantSDNode>(Op1);
if (!K1)
return SDValue();
@@ -3790,23 +4291,22 @@ static SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
}
EVT VT = K0->getValueType(0);
+ unsigned Med3Opc = Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3;
+ if (VT == MVT::i32 || (VT == MVT::i16 && Subtarget->hasMed3_16())) {
+ return DAG.getNode(Med3Opc, SL, VT,
+ Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
+ }
+ // If there isn't a 16-bit med3 operation, convert to 32-bit.
MVT NVT = MVT::i32;
unsigned ExtOp = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- SDValue Tmp1, Tmp2, Tmp3;
- Tmp1 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(0));
- Tmp2 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(1));
- Tmp3 = DAG.getNode(ExtOp, SL, NVT, Op1);
-
- if (VT == MVT::i16) {
- Tmp1 = DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, NVT,
- Tmp1, Tmp2, Tmp3);
+ SDValue Tmp1 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(0));
+ SDValue Tmp2 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(1));
+ SDValue Tmp3 = DAG.getNode(ExtOp, SL, NVT, Op1);
- return DAG.getNode(ISD::TRUNCATE, SL, VT, Tmp1);
- } else
- return DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, VT,
- Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
+ SDValue Med3 = DAG.getNode(Med3Opc, SL, NVT, Tmp1, Tmp2, Tmp3);
+ return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3);
}
static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
@@ -3816,8 +4316,10 @@ static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
return DAG.isKnownNeverNaN(Op);
}
-static SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
- SDValue Op0, SDValue Op1) {
+SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
+ const SDLoc &SL,
+ SDValue Op0,
+ SDValue Op1) const {
ConstantFPSDNode *K1 = dyn_cast<ConstantFPSDNode>(Op1);
if (!K1)
return SDValue();
@@ -3831,6 +4333,20 @@ static SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
if (Cmp == APFloat::cmpGreaterThan)
return SDValue();
+ // TODO: Check IEEE bit enabled?
+ EVT VT = K0->getValueType(0);
+ if (Subtarget->enableDX10Clamp()) {
+ // If dx10_clamp is enabled, NaNs clamp to 0.0. This is the same as the
+ // hardware fmed3 behavior converting to a min.
+ // FIXME: Should this be allowing -0.0?
+ if (K1->isExactlyValue(1.0) && K0->isExactlyValue(0.0))
+ return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Op0.getOperand(0));
+ }
+
+ // med3 for f16 is only available on gfx9+.
+ if (VT == MVT::f64 || (VT == MVT::f16 && !Subtarget->hasMed3_16()))
+ return SDValue();
+
// This isn't safe with signaling NaNs because in IEEE mode, min/max on a
// signaling NaN gives a quiet NaN. The quiet NaN input to the min would then
// give the other result, which is different from med3 with a NaN input.
@@ -3846,6 +4362,7 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
unsigned Opc = N->getOpcode();
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
@@ -3853,7 +4370,9 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
// Only do this if the inner op has one use since this will just increases
// register pressure for no benefit.
- if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY) {
+
+ if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
+ VT != MVT::f64) {
// max(max(a, b), c) -> max3(a, b, c)
// min(min(a, b), c) -> min3(a, b, c)
if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
@@ -3895,7 +4414,9 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
if (((Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM) ||
(Opc == AMDGPUISD::FMIN_LEGACY &&
Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) &&
- N->getValueType(0) == MVT::f32 && Op0.hasOneUse()) {
+ (VT == MVT::f32 || VT == MVT::f64 ||
+ (VT == MVT::f16 && Subtarget->has16BitInsts())) &&
+ Op0.hasOneUse()) {
if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
return Res;
}
@@ -3903,6 +4424,69 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
return SDValue();
}
+static bool isClampZeroToOne(SDValue A, SDValue B) {
+ if (ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A)) {
+ if (ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B)) {
+ // FIXME: Should this be allowing -0.0?
+ return (CA->isExactlyValue(0.0) && CB->isExactlyValue(1.0)) ||
+ (CA->isExactlyValue(1.0) && CB->isExactlyValue(0.0));
+ }
+ }
+
+ return false;
+}
+
+// FIXME: Should only worry about snans for version with chain.
+SDValue SITargetLowering::performFMed3Combine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ EVT VT = N->getValueType(0);
+ // v_med3_f32 and v_max_f32 behave identically wrt denorms, exceptions and
+ // NaNs. With a NaN input, the order of the operands may change the result.
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+
+ SDValue Src0 = N->getOperand(0);
+ SDValue Src1 = N->getOperand(1);
+ SDValue Src2 = N->getOperand(2);
+
+ if (isClampZeroToOne(Src0, Src1)) {
+ // const_a, const_b, x -> clamp is safe in all cases including signaling
+ // nans.
+ // FIXME: Should this be allowing -0.0?
+ return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Src2);
+ }
+
+ // FIXME: dx10_clamp behavior assumed in instcombine. Should we really bother
+ // handling no dx10-clamp?
+ if (Subtarget->enableDX10Clamp()) {
+ // If NaNs is clamped to 0, we are free to reorder the inputs.
+
+ if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
+ std::swap(Src0, Src1);
+
+ if (isa<ConstantFPSDNode>(Src1) && !isa<ConstantFPSDNode>(Src2))
+ std::swap(Src1, Src2);
+
+ if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
+ std::swap(Src0, Src1);
+
+ if (isClampZeroToOne(Src1, Src2))
+ return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Src0);
+ }
+
+ return SDValue();
+}
+
+SDValue SITargetLowering::performCvtPkRTZCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDValue Src0 = N->getOperand(0);
+ SDValue Src1 = N->getOperand(1);
+ if (Src0.isUndef() && Src1.isUndef())
+ return DCI.DAG.getUNDEF(N->getValueType(0));
+ return SDValue();
+}
+
unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0,
const SDNode *N1) const {
@@ -3933,7 +4517,6 @@ SDValue SITargetLowering::performFAddCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
- assert(!VT.isVector());
SDLoc SL(N);
SDValue LHS = N->getOperand(0);
@@ -4112,7 +4695,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::FMIN_LEGACY:
case AMDGPUISD::FMAX_LEGACY: {
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
- N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
return performMinMaxCombine(N, DCI);
break;
@@ -4135,17 +4717,18 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
case AMDGPUISD::ATOMIC_INC:
- case AMDGPUISD::ATOMIC_DEC: { // TODO: Target mem intrinsics.
+ case AMDGPUISD::ATOMIC_DEC: // TODO: Target mem intrinsics.
if (DCI.isBeforeLegalize())
break;
return performMemSDNodeCombine(cast<MemSDNode>(N), DCI);
- }
case ISD::AND:
return performAndCombine(N, DCI);
case ISD::OR:
return performOrCombine(N, DCI);
case ISD::XOR:
return performXorCombine(N, DCI);
+ case ISD::ZERO_EXTEND:
+ return performZeroExtendCombine(N, DCI);
case AMDGPUISD::FP_CLASS:
return performClassCombine(N, DCI);
case ISD::FCANONICALIZE:
@@ -4170,6 +4753,28 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::CVT_F32_UBYTE2:
case AMDGPUISD::CVT_F32_UBYTE3:
return performCvtF32UByteNCombine(N, DCI);
+ case AMDGPUISD::FMED3:
+ return performFMed3Combine(N, DCI);
+ case AMDGPUISD::CVT_PKRTZ_F16_F32:
+ return performCvtPkRTZCombine(N, DCI);
+ case ISD::SCALAR_TO_VECTOR: {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+
+ // v2i16 (scalar_to_vector i16:x) -> v2i16 (bitcast (any_extend i16:x))
+ if (VT == MVT::v2i16 || VT == MVT::v2f16) {
+ SDLoc SL(N);
+ SDValue Src = N->getOperand(0);
+ EVT EltVT = Src.getValueType();
+ if (EltVT == MVT::f16)
+ Src = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Src);
+
+ SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Src);
+ return DAG.getNode(ISD::BITCAST, SL, VT, Ext);
+ }
+
+ break;
+ }
}
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
@@ -4198,6 +4803,10 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
I != E; ++I) {
+ // Don't look at users of the chain.
+ if (I.getUse().getResNo() != 0)
+ continue;
+
// Abort if we can't understand the usage
if (!I->isMachineOpcode() ||
I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
@@ -4250,7 +4859,6 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// Update the users of the node with the new indices
for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
-
SDNode *User = Users[i];
if (!User)
continue;
@@ -4277,8 +4885,33 @@ static bool isFrameIndexOp(SDValue Op) {
/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
/// with frame index operands.
/// LLVM assumes that inputs are to these instructions are registers.
-void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
- SelectionDAG &DAG) const {
+SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
+ SelectionDAG &DAG) const {
+ if (Node->getOpcode() == ISD::CopyToReg) {
+ RegisterSDNode *DestReg = cast<RegisterSDNode>(Node->getOperand(1));
+ SDValue SrcVal = Node->getOperand(2);
+
+ // Insert a copy to a VReg_1 virtual register so LowerI1Copies doesn't have
+ // to try understanding copies to physical registers.
+ if (SrcVal.getValueType() == MVT::i1 &&
+ TargetRegisterInfo::isPhysicalRegister(DestReg->getReg())) {
+ SDLoc SL(Node);
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ SDValue VReg = DAG.getRegister(
+ MRI.createVirtualRegister(&AMDGPU::VReg_1RegClass), MVT::i1);
+
+ SDNode *Glued = Node->getGluedNode();
+ SDValue ToVReg
+ = DAG.getCopyToReg(Node->getOperand(0), SL, VReg, SrcVal,
+ SDValue(Glued, Glued ? Glued->getNumValues() - 1 : 0));
+ SDValue ToResultReg
+ = DAG.getCopyToReg(ToVReg, SL, SDValue(DestReg, 0),
+ VReg, ToVReg.getValue(1));
+ DAG.ReplaceAllUsesWith(Node, ToResultReg.getNode());
+ DAG.RemoveDeadNode(Node);
+ return ToResultReg.getNode();
+ }
+ }
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
@@ -4294,6 +4927,7 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
}
DAG.UpdateNodeOperands(Node, Ops);
+ return Node;
}
/// \brief Fold the instructions after selecting them.
@@ -4496,6 +5130,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &AMDGPU::SReg_128RegClass);
case 256:
return std::make_pair(0U, &AMDGPU::SReg_256RegClass);
+ case 512:
+ return std::make_pair(0U, &AMDGPU::SReg_512RegClass);
}
case 'v':
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index 6c04e4f30977..d177777ad5ee 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -21,11 +21,13 @@
namespace llvm {
class SITargetLowering final : public AMDGPUTargetLowering {
- SDValue LowerParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain,
- unsigned Offset) const;
- SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL,
- SDValue Chain, unsigned Offset, bool Signed,
- const ISD::InputArg *Arg = nullptr) const;
+ SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
+ SDValue Chain, uint64_t Offset) const;
+ SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
+ const SDLoc &SL, SDValue Chain,
+ uint64_t Offset, bool Signed,
+ const ISD::InputArg *Arg = nullptr) const;
+
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
@@ -55,11 +57,19 @@ class SITargetLowering final : public AMDGPUTargetLowering {
const SDLoc &DL,
EVT VT) const;
+ SDValue convertArgType(
+ SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val,
+ bool Signed, const ISD::InputArg *Arg = nullptr) const;
+
/// \brief Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
- SDValue getSegmentAperture(unsigned AS, SelectionDAG &DAG) const;
+ SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
+ SelectionDAG &DAG) const;
+
SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
@@ -79,10 +89,17 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performZeroExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
+ SDValue Op0, SDValue Op1) const;
+ SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
+ SDValue Op0, SDValue Op1, bool Signed) const;
SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const;
unsigned getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0, const SDNode *N1) const;
@@ -94,7 +111,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
- bool isCFIntrinsic(const SDNode *Intr) const;
+ unsigned isCFIntrinsic(const SDNode *Intr) const;
void createDebuggerPrologueStackObjects(MachineFunction &MF) const;
@@ -115,11 +132,15 @@ public:
const SISubtarget *getSubtarget() const;
+ bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
+ EVT /*VT*/) const override;
+
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
unsigned IntrinsicID) const override;
- bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
- EVT /*VT*/) const override;
+ bool getAddrModeArguments(IntrinsicInst * /*I*/,
+ SmallVectorImpl<Value*> &/*Ops*/,
+ Type *&/*AccessTy*/) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
@@ -175,6 +196,9 @@ public:
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
void AdjustInstrPostInstrSelection(MachineInstr &MI,
@@ -182,7 +206,7 @@ public:
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const override;
- void legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const;
+ SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const;
MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL,
SDValue Ptr) const;
diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp
index 91e4bf755c53..ba346d2fad02 100644
--- a/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -1,4 +1,4 @@
-//===-- SIInsertSkips.cpp - Use predicates for control flow ----------===//
+//===-- SIInsertSkips.cpp - Use predicates for control flow ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,33 +12,46 @@
/// branches when it's expected that jumping over the untaken control flow will
/// be cheaper than having every workitem no-op through it.
//
+//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
#define DEBUG_TYPE "si-insert-skips"
-namespace {
-
static cl::opt<unsigned> SkipThresholdFlag(
"amdgpu-skip-threshold",
cl::desc("Number of instructions before jumping over divergent control flow"),
cl::init(12), cl::Hidden);
+namespace {
+
class SIInsertSkips : public MachineFunctionPass {
private:
- const SIRegisterInfo *TRI;
- const SIInstrInfo *TII;
- unsigned SkipThreshold;
+ const SIRegisterInfo *TRI = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ unsigned SkipThreshold = 0;
bool shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const;
@@ -55,8 +68,7 @@ private:
public:
static char ID;
- SIInsertSkips() :
- MachineFunctionPass(ID), TRI(nullptr), TII(nullptr), SkipThreshold(0) { }
+ SIInsertSkips() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -69,7 +81,7 @@ public:
}
};
-} // End anonymous namespace
+} // end anonymous namespace
char SIInsertSkips::ID = 0;
@@ -195,8 +207,8 @@ void SIInsertSkips::kill(MachineInstr &MI) {
}
} else {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
- .addImm(0)
- .addOperand(Op);
+ .addImm(0)
+ .add(Op);
}
}
@@ -251,6 +263,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
BI != BE; BI = NextBB) {
NextBB = std::next(BI);
MachineBasicBlock &MBB = *BI;
+ bool HaveSkipBlock = false;
if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) {
// Reached convergence point for last divergent branch.
@@ -270,27 +283,33 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
- case AMDGPU::SI_MASK_BRANCH: {
+ case AMDGPU::SI_MASK_BRANCH:
ExecBranchStack.push_back(MI.getOperand(0).getMBB());
MadeChange |= skipMaskBranch(MI, MBB);
break;
- }
- case AMDGPU::S_BRANCH: {
+
+ case AMDGPU::S_BRANCH:
// Optimize out branches to the next block.
// FIXME: Shouldn't this be handled by BranchFolding?
- if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB()))
+ if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
+ MI.eraseFromParent();
+ } else if (HaveSkipBlock) {
+ // Remove the given unconditional branch when a skip block has been
+ // inserted after the current one and let skip the two instructions
+ // performing the kill if the exec mask is non-zero.
MI.eraseFromParent();
+ }
break;
- }
- case AMDGPU::SI_KILL_TERMINATOR: {
+
+ case AMDGPU::SI_KILL_TERMINATOR:
MadeChange = true;
kill(MI);
if (ExecBranchStack.empty()) {
if (skipIfDead(MI, *NextBB)) {
+ HaveSkipBlock = true;
NextBB = std::next(BI);
BE = MF.end();
- Next = MBB.end();
}
} else {
HaveKill = true;
@@ -298,15 +317,15 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
MI.eraseFromParent();
break;
- }
- case AMDGPU::SI_RETURN: {
+
+ case AMDGPU::SI_RETURN_TO_EPILOG:
// FIXME: Should move somewhere else
assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
// Graphics shaders returning non-void shouldn't contain S_ENDPGM,
// because external bytecode will be appended at the end.
if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
- // SI_RETURN is not the last instruction. Add an empty block at
+ // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block at
// the end and jump there.
if (!EmptyMBBAtEnd) {
EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
@@ -318,7 +337,8 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
.addMBB(EmptyMBBAtEnd);
I->eraseFromParent();
}
- }
+ break;
+
default:
break;
}
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
new file mode 100644
index 000000000000..c2a3e62aa827
--- /dev/null
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -0,0 +1,1863 @@
+//===-- SIInsertWaitcnts.cpp - Insert Wait Instructions --------------------===/
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Insert wait instructions for memory reads and writes.
+///
+/// Memory reads and writes are issued asynchronously, so we need to insert
+/// S_WAITCNT instructions when we want to access any of their results or
+/// overwrite any register that's used asynchronously.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define DEBUG_TYPE "si-insert-waitcnts"
+
+using namespace llvm;
+
+namespace {
+
+// Class of object that encapsulates latest instruction counter score
+// associated with the operand. Used for determining whether
+// s_waitcnt instruction needs to be emited.
+
+#define CNT_MASK(t) (1u << (t))
+
+enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, NUM_INST_CNTS };
+
+typedef std::pair<signed, signed> RegInterval;
+
+struct {
+ int32_t VmcntMax;
+ int32_t ExpcntMax;
+ int32_t LgkmcntMax;
+ int32_t NumVGPRsMax;
+ int32_t NumSGPRsMax;
+} HardwareLimits;
+
+struct {
+ unsigned VGPR0;
+ unsigned VGPRL;
+ unsigned SGPR0;
+ unsigned SGPRL;
+} RegisterEncoding;
+
+enum WaitEventType {
+ VMEM_ACCESS, // vector-memory read & write
+ LDS_ACCESS, // lds read & write
+ GDS_ACCESS, // gds read & write
+ SQ_MESSAGE, // send message
+ SMEM_ACCESS, // scalar-memory read & write
+ EXP_GPR_LOCK, // export holding on its data src
+ GDS_GPR_LOCK, // GDS holding on its data and addr src
+ EXP_POS_ACCESS, // write to export position
+ EXP_PARAM_ACCESS, // write to export parameter
+ VMW_GPR_LOCK, // vector-memory write holding on its data src
+ NUM_WAIT_EVENTS,
+};
+
+// The mapping is:
+// 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs
+// SQ_MAX_PGM_VGPRS .. NUM_ALL_VGPRS-1 extra VGPR-like slots
+// NUM_ALL_VGPRS .. NUM_ALL_VGPRS+SQ_MAX_PGM_SGPRS-1 real SGPRs
+// We reserve a fixed number of VGPR slots in the scoring tables for
+// special tokens like SCMEM_LDS (needed for buffer load to LDS).
+enum RegisterMapping {
+ SQ_MAX_PGM_VGPRS = 256, // Maximum programmable VGPRs across all targets.
+ SQ_MAX_PGM_SGPRS = 256, // Maximum programmable SGPRs across all targets.
+ NUM_EXTRA_VGPRS = 1, // A reserved slot for DS.
+ EXTRA_VGPR_LDS = 0, // This is a placeholder the Shader algorithm uses.
+ NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_EXTRA_VGPRS, // Where SGPR starts.
+};
+
+#define ForAllWaitEventType(w) \
+ for (enum WaitEventType w = (enum WaitEventType)0; \
+ (w) < (enum WaitEventType)NUM_WAIT_EVENTS; \
+ (w) = (enum WaitEventType)((w) + 1))
+
+// This is a per-basic-block object that maintains current score brackets
+// of each wait-counter, and a per-register scoreboard for each wait-couner.
+// We also maintain the latest score for every event type that can change the
+// waitcnt in order to know if there are multiple types of events within
+// the brackets. When multiple types of event happen in the bracket,
+// wait-count may get decreased out of order, therefore we need to put in
+// "s_waitcnt 0" before use.
+class BlockWaitcntBrackets {
+public:
+ static int32_t getWaitCountMax(InstCounterType T) {
+ switch (T) {
+ case VM_CNT:
+ return HardwareLimits.VmcntMax;
+ case LGKM_CNT:
+ return HardwareLimits.LgkmcntMax;
+ case EXP_CNT:
+ return HardwareLimits.ExpcntMax;
+ default:
+ break;
+ }
+ return 0;
+ };
+
+ void setScoreLB(InstCounterType T, int32_t Val) {
+ assert(T < NUM_INST_CNTS);
+ if (T >= NUM_INST_CNTS)
+ return;
+ ScoreLBs[T] = Val;
+ };
+
+ void setScoreUB(InstCounterType T, int32_t Val) {
+ assert(T < NUM_INST_CNTS);
+ if (T >= NUM_INST_CNTS)
+ return;
+ ScoreUBs[T] = Val;
+ if (T == EXP_CNT) {
+ int32_t UB = (int)(ScoreUBs[T] - getWaitCountMax(EXP_CNT));
+ if (ScoreLBs[T] < UB)
+ ScoreLBs[T] = UB;
+ }
+ };
+
+ int32_t getScoreLB(InstCounterType T) {
+ assert(T < NUM_INST_CNTS);
+ if (T >= NUM_INST_CNTS)
+ return 0;
+ return ScoreLBs[T];
+ };
+
+ int32_t getScoreUB(InstCounterType T) {
+ assert(T < NUM_INST_CNTS);
+ if (T >= NUM_INST_CNTS)
+ return 0;
+ return ScoreUBs[T];
+ };
+
+ // Mapping from event to counter.
+ InstCounterType eventCounter(WaitEventType E) {
+ switch (E) {
+ case VMEM_ACCESS:
+ return VM_CNT;
+ case LDS_ACCESS:
+ case GDS_ACCESS:
+ case SQ_MESSAGE:
+ case SMEM_ACCESS:
+ return LGKM_CNT;
+ case EXP_GPR_LOCK:
+ case GDS_GPR_LOCK:
+ case VMW_GPR_LOCK:
+ case EXP_POS_ACCESS:
+ case EXP_PARAM_ACCESS:
+ return EXP_CNT;
+ default:
+ llvm_unreachable("unhandled event type");
+ }
+ return NUM_INST_CNTS;
+ }
+
+ void setRegScore(int GprNo, InstCounterType T, int32_t Val) {
+ if (GprNo < NUM_ALL_VGPRS) {
+ if (GprNo > VgprUB) {
+ VgprUB = GprNo;
+ }
+ VgprScores[T][GprNo] = Val;
+ } else {
+ assert(T == LGKM_CNT);
+ if (GprNo - NUM_ALL_VGPRS > SgprUB) {
+ SgprUB = GprNo - NUM_ALL_VGPRS;
+ }
+ SgprScores[GprNo - NUM_ALL_VGPRS] = Val;
+ }
+ }
+
+ int32_t getRegScore(int GprNo, InstCounterType T) {
+ if (GprNo < NUM_ALL_VGPRS) {
+ return VgprScores[T][GprNo];
+ }
+ return SgprScores[GprNo - NUM_ALL_VGPRS];
+ }
+
+ void clear() {
+ memset(ScoreLBs, 0, sizeof(ScoreLBs));
+ memset(ScoreUBs, 0, sizeof(ScoreUBs));
+ memset(EventUBs, 0, sizeof(EventUBs));
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ memset(VgprScores[T], 0, sizeof(VgprScores[T]));
+ }
+ memset(SgprScores, 0, sizeof(SgprScores));
+ }
+
+ RegInterval getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII,
+ const MachineRegisterInfo *MRI,
+ const SIRegisterInfo *TRI, unsigned OpNo,
+ bool Def) const;
+
+ void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
+ unsigned OpNo, int32_t Val);
+
+ void setWaitAtBeginning() { WaitAtBeginning = true; }
+ void clearWaitAtBeginning() { WaitAtBeginning = false; }
+ bool getWaitAtBeginning() const { return WaitAtBeginning; }
+ void setEventUB(enum WaitEventType W, int32_t Val) { EventUBs[W] = Val; }
+ int32_t getMaxVGPR() const { return VgprUB; }
+ int32_t getMaxSGPR() const { return SgprUB; }
+ int32_t getEventUB(enum WaitEventType W) const {
+ assert(W < NUM_WAIT_EVENTS);
+ return EventUBs[W];
+ }
+ bool counterOutOfOrder(InstCounterType T);
+ unsigned int updateByWait(InstCounterType T, int ScoreToWait);
+ void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI, WaitEventType E,
+ MachineInstr &MI);
+
+ BlockWaitcntBrackets()
+ : WaitAtBeginning(false), ValidLoop(false), MixedExpTypes(false),
+ LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) {
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ memset(VgprScores[T], 0, sizeof(VgprScores[T]));
+ }
+ }
+ ~BlockWaitcntBrackets(){};
+
+ bool hasPendingSMEM() const {
+ return (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] &&
+ EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]);
+ }
+
+ bool hasPendingFlat() const {
+ return ((LastFlat[LGKM_CNT] > ScoreLBs[LGKM_CNT] &&
+ LastFlat[LGKM_CNT] <= ScoreUBs[LGKM_CNT]) ||
+ (LastFlat[VM_CNT] > ScoreLBs[VM_CNT] &&
+ LastFlat[VM_CNT] <= ScoreUBs[VM_CNT]));
+ }
+
+ void setPendingFlat() {
+ LastFlat[VM_CNT] = ScoreUBs[VM_CNT];
+ LastFlat[LGKM_CNT] = ScoreUBs[LGKM_CNT];
+ }
+
+ int pendingFlat(InstCounterType Ct) const { return LastFlat[Ct]; }
+
+ void setLastFlat(InstCounterType Ct, int Val) { LastFlat[Ct] = Val; }
+
+ bool getRevisitLoop() const { return RevisitLoop; }
+ void setRevisitLoop(bool RevisitLoopIn) { RevisitLoop = RevisitLoopIn; }
+
+ void setPostOrder(int32_t PostOrderIn) { PostOrder = PostOrderIn; }
+ int32_t getPostOrder() const { return PostOrder; }
+
+ void setWaitcnt(MachineInstr *WaitcntIn) { Waitcnt = WaitcntIn; }
+ void clearWaitcnt() { Waitcnt = NULL; }
+ MachineInstr *getWaitcnt() const { return Waitcnt; }
+
+ bool mixedExpTypes() const { return MixedExpTypes; }
+ void setMixedExpTypes(bool MixedExpTypesIn) {
+ MixedExpTypes = MixedExpTypesIn;
+ }
+
+ void print(raw_ostream &);
+ void dump() { print(dbgs()); }
+
+private:
+ bool WaitAtBeginning;
+ bool RevisitLoop;
+ bool ValidLoop;
+ bool MixedExpTypes;
+ MachineLoop *LoopRegion;
+ int32_t PostOrder;
+ MachineInstr *Waitcnt;
+ int32_t ScoreLBs[NUM_INST_CNTS] = {0};
+ int32_t ScoreUBs[NUM_INST_CNTS] = {0};
+ int32_t EventUBs[NUM_WAIT_EVENTS] = {0};
+ // Remember the last flat memory operation.
+ int32_t LastFlat[NUM_INST_CNTS] = {0};
+ // wait_cnt scores for every vgpr.
+ // Keep track of the VgprUB and SgprUB to make merge at join efficient.
+ int32_t VgprUB;
+ int32_t SgprUB;
+ int32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
+ // Wait cnt scores for every sgpr, only lgkmcnt is relevant.
+ int32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
+};
+
+// This is a per-loop-region object that records waitcnt status at the end of
+// loop footer from the previous iteration. We also maintain an iteration
+// count to track the number of times the loop has been visited. When it
+// doesn't converge naturally, we force convergence by inserting s_waitcnt 0
+// at the end of the loop footer.
+class LoopWaitcntData {
+public:
+ void incIterCnt() { IterCnt++; }
+ void resetIterCnt() { IterCnt = 0; }
+ int32_t getIterCnt() { return IterCnt; }
+
+ LoopWaitcntData() : LfWaitcnt(NULL), IterCnt(0) {}
+ ~LoopWaitcntData(){};
+
+ void setWaitcnt(MachineInstr *WaitcntIn) { LfWaitcnt = WaitcntIn; }
+ MachineInstr *getWaitcnt() const { return LfWaitcnt; }
+
+ void print() {
+ DEBUG(dbgs() << " iteration " << IterCnt << '\n';);
+ return;
+ }
+
+private:
+ // s_waitcnt added at the end of loop footer to stablize wait scores
+ // at the end of the loop footer.
+ MachineInstr *LfWaitcnt;
+ // Number of iterations the loop has been visited, not including the initial
+ // walk over.
+ int32_t IterCnt;
+};
+
+class SIInsertWaitcnts : public MachineFunctionPass {
+
+private:
+ const SISubtarget *ST;
+ const SIInstrInfo *TII;
+ const SIRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const MachineLoopInfo *MLI;
+ AMDGPU::IsaInfo::IsaVersion IV;
+ AMDGPUAS AMDGPUASI;
+
+ DenseSet<MachineBasicBlock *> BlockVisitedSet;
+ DenseSet<MachineInstr *> CompilerGeneratedWaitcntSet;
+ DenseSet<MachineInstr *> VCCZBugHandledSet;
+
+ DenseMap<MachineBasicBlock *, std::unique_ptr<BlockWaitcntBrackets>>
+ BlockWaitcntBracketsMap;
+
+ DenseSet<MachineBasicBlock *> BlockWaitcntProcessedSet;
+
+ DenseMap<MachineLoop *, std::unique_ptr<LoopWaitcntData>> LoopWaitcntDataMap;
+
+ std::vector<std::unique_ptr<BlockWaitcntBrackets>> KillWaitBrackets;
+
+public:
+ static char ID;
+
+ SIInsertWaitcnts()
+ : MachineFunctionPass(ID), ST(nullptr), TII(nullptr), TRI(nullptr),
+ MRI(nullptr), MLI(nullptr) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "SI insert wait instructions";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ void addKillWaitBracket(BlockWaitcntBrackets *Bracket) {
+ // The waitcnt information is copied because it changes as the block is
+ // traversed.
+ KillWaitBrackets.push_back(make_unique<BlockWaitcntBrackets>(*Bracket));
+ }
+
+ MachineInstr *generateSWaitCntInstBefore(MachineInstr &MI,
+ BlockWaitcntBrackets *ScoreBrackets);
+ void updateEventWaitCntAfter(MachineInstr &Inst,
+ BlockWaitcntBrackets *ScoreBrackets);
+ void mergeInputScoreBrackets(MachineBasicBlock &Block);
+ MachineBasicBlock *loopBottom(const MachineLoop *Loop);
+ void insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block);
+ void insertWaitcntBeforeCF(MachineBasicBlock &Block, MachineInstr *Inst);
+};
+
+} // End anonymous namespace.
+
+RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI,
+ const SIInstrInfo *TII,
+ const MachineRegisterInfo *MRI,
+ const SIRegisterInfo *TRI,
+ unsigned OpNo,
+ bool Def) const {
+ const MachineOperand &Op = MI->getOperand(OpNo);
+ if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()) ||
+ (Def && !Op.isDef()))
+ return {-1, -1};
+
+ // A use via a PW operand does not need a waitcnt.
+ // A partial write is not a WAW.
+ assert(!Op.getSubReg() || !Op.isUndef());
+
+ RegInterval Result;
+ const MachineRegisterInfo &MRIA = *MRI;
+
+ unsigned Reg = TRI->getEncodingValue(Op.getReg());
+
+ if (TRI->isVGPR(MRIA, Op.getReg())) {
+ assert(Reg >= RegisterEncoding.VGPR0 && Reg <= RegisterEncoding.VGPRL);
+ Result.first = Reg - RegisterEncoding.VGPR0;
+ assert(Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
+ } else if (TRI->isSGPRReg(MRIA, Op.getReg())) {
+ assert(Reg >= RegisterEncoding.SGPR0 && Reg < SQ_MAX_PGM_SGPRS);
+ Result.first = Reg - RegisterEncoding.SGPR0 + NUM_ALL_VGPRS;
+ assert(Result.first >= NUM_ALL_VGPRS &&
+ Result.first < SQ_MAX_PGM_SGPRS + NUM_ALL_VGPRS);
+ }
+ // TODO: Handle TTMP
+ // else if (TRI->isTTMP(MRIA, Reg.getReg())) ...
+ else
+ return {-1, -1};
+
+ const MachineInstr &MIA = *MI;
+ const TargetRegisterClass *RC = TII->getOpRegClass(MIA, OpNo);
+ unsigned Size = RC->getSize();
+ Result.second = Result.first + (Size / 4);
+
+ return Result;
+}
+
+void BlockWaitcntBrackets::setExpScore(const MachineInstr *MI,
+ const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI,
+ unsigned OpNo, int32_t Val) {
+ RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo, false);
+ DEBUG({
+ const MachineOperand &Opnd = MI->getOperand(OpNo);
+ assert(TRI->isVGPR(*MRI, Opnd.getReg()));
+ });
+ for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ setRegScore(RegNo, EXP_CNT, Val);
+ }
+}
+
+void BlockWaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI,
+ WaitEventType E, MachineInstr &Inst) {
+ const MachineRegisterInfo &MRIA = *MRI;
+ InstCounterType T = eventCounter(E);
+ int32_t CurrScore = getScoreUB(T) + 1;
+ // EventUB and ScoreUB need to be update regardless if this event changes
+ // the score of a register or not.
+ // Examples including vm_cnt when buffer-store or lgkm_cnt when send-message.
+ EventUBs[E] = CurrScore;
+ setScoreUB(T, CurrScore);
+
+ if (T == EXP_CNT) {
+ // Check for mixed export types. If they are mixed, then a waitcnt exp(0)
+ // is required.
+ if (!MixedExpTypes) {
+ MixedExpTypes = counterOutOfOrder(EXP_CNT);
+ }
+
+ // Put score on the source vgprs. If this is a store, just use those
+ // specific register(s).
+ if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
+ // All GDS operations must protect their address register (same as
+ // export.)
+ if (Inst.getOpcode() != AMDGPU::DS_APPEND &&
+ Inst.getOpcode() != AMDGPU::DS_CONSUME) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr),
+ CurrScore);
+ }
+ if (Inst.mayStore()) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
+ CurrScore);
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::data1) != -1) {
+ setExpScore(&Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::data1),
+ CurrScore);
+ }
+ } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1 &&
+ Inst.getOpcode() != AMDGPU::DS_GWS_INIT &&
+ Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_V &&
+ Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_BR &&
+ Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_P &&
+ Inst.getOpcode() != AMDGPU::DS_GWS_BARRIER &&
+ Inst.getOpcode() != AMDGPU::DS_APPEND &&
+ Inst.getOpcode() != AMDGPU::DS_CONSUME &&
+ Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) {
+ for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
+ const MachineOperand &Op = Inst.getOperand(I);
+ if (Op.isReg() && !Op.isDef() && TRI->isVGPR(MRIA, Op.getReg())) {
+ setExpScore(&Inst, TII, TRI, MRI, I, CurrScore);
+ }
+ }
+ }
+ } else if (TII->isFLAT(Inst)) {
+ if (Inst.mayStore()) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
+ CurrScore);
+ } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
+ CurrScore);
+ }
+ } else if (TII->isMIMG(Inst)) {
+ if (Inst.mayStore()) {
+ setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
+ } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
+ CurrScore);
+ }
+ } else if (TII->isMTBUF(Inst)) {
+ if (Inst.mayStore()) {
+ setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
+ }
+ } else if (TII->isMUBUF(Inst)) {
+ if (Inst.mayStore()) {
+ setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
+ } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
+ CurrScore);
+ }
+ } else {
+ if (TII->isEXP(Inst)) {
+ // For export the destination registers are really temps that
+ // can be used as the actual source after export patching, so
+ // we need to treat them like sources and set the EXP_CNT
+ // score.
+ for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
+ MachineOperand &DefMO = Inst.getOperand(I);
+ if (DefMO.isReg() && DefMO.isDef() &&
+ TRI->isVGPR(MRIA, DefMO.getReg())) {
+ setRegScore(TRI->getEncodingValue(DefMO.getReg()), EXP_CNT,
+ CurrScore);
+ }
+ }
+ }
+ for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = Inst.getOperand(I);
+ if (MO.isReg() && !MO.isDef() && TRI->isVGPR(MRIA, MO.getReg())) {
+ setExpScore(&Inst, TII, TRI, MRI, I, CurrScore);
+ }
+ }
+ }
+#if 0 // TODO: check if this is handled by MUBUF code above.
+ } else if (Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORD ||
+ Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX2 ||
+ Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX4) {
+ MachineOperand *MO = TII->getNamedOperand(Inst, AMDGPU::OpName::data);
+ unsigned OpNo;//TODO: find the OpNo for this operand;
+ RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, OpNo, false);
+ for (signed RegNo = Interval.first; RegNo < Interval.second;
+ ++RegNo) {
+ setRegScore(RegNo + NUM_ALL_VGPRS, t, CurrScore);
+ }
+#endif
+ } else {
+ // Match the score to the destination registers.
+ for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
+ RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, I, true);
+ if (T == VM_CNT && Interval.first >= NUM_ALL_VGPRS)
+ continue;
+ for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ setRegScore(RegNo, T, CurrScore);
+ }
+ }
+ if (TII->isDS(Inst) && Inst.mayStore()) {
+ setRegScore(SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS, T, CurrScore);
+ }
+ }
+}
+
+void BlockWaitcntBrackets::print(raw_ostream &OS) {
+ OS << '\n';
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ int LB = getScoreLB(T);
+ int UB = getScoreUB(T);
+
+ switch (T) {
+ case VM_CNT:
+ OS << " VM_CNT(" << UB - LB << "): ";
+ break;
+ case LGKM_CNT:
+ OS << " LGKM_CNT(" << UB - LB << "): ";
+ break;
+ case EXP_CNT:
+ OS << " EXP_CNT(" << UB - LB << "): ";
+ break;
+ default:
+ OS << " UNKNOWN(" << UB - LB << "): ";
+ break;
+ }
+
+ if (LB < UB) {
+ // Print vgpr scores.
+ for (int J = 0; J <= getMaxVGPR(); J++) {
+ int RegScore = getRegScore(J, T);
+ if (RegScore <= LB)
+ continue;
+ int RelScore = RegScore - LB - 1;
+ if (J < SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS) {
+ OS << RelScore << ":v" << J << " ";
+ } else {
+ OS << RelScore << ":ds ";
+ }
+ }
+ // Also need to print sgpr scores for lgkm_cnt.
+ if (T == LGKM_CNT) {
+ for (int J = 0; J <= getMaxSGPR(); J++) {
+ int RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
+ if (RegScore <= LB)
+ continue;
+ int RelScore = RegScore - LB - 1;
+ OS << RelScore << ":s" << J << " ";
+ }
+ }
+ }
+ OS << '\n';
+ }
+ OS << '\n';
+ return;
+}
+
+unsigned int BlockWaitcntBrackets::updateByWait(InstCounterType T,
+ int ScoreToWait) {
+ unsigned int NeedWait = 0;
+ if (ScoreToWait == -1) {
+ // The score to wait is unknown. This implies that it was not encountered
+ // during the path of the CFG walk done during the current traversal but
+ // may be seen on a different path. Emit an s_wait counter with a
+ // conservative value of 0 for the counter.
+ NeedWait = CNT_MASK(T);
+ setScoreLB(T, getScoreUB(T));
+ return NeedWait;
+ }
+
+ // If the score of src_operand falls within the bracket, we need an
+ // s_waitcnt instruction.
+ const int32_t LB = getScoreLB(T);
+ const int32_t UB = getScoreUB(T);
+ if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
+ if (T == VM_CNT && hasPendingFlat()) {
+ // If there is a pending FLAT operation, and this is a VM waitcnt,
+ // then we need to force a waitcnt 0 for VM.
+ NeedWait = CNT_MASK(T);
+ setScoreLB(T, getScoreUB(T));
+ } else if (counterOutOfOrder(T)) {
+ // Counter can get decremented out-of-order when there
+ // are multiple types event in the brack. Also emit an s_wait counter
+ // with a conservative value of 0 for the counter.
+ NeedWait = CNT_MASK(T);
+ setScoreLB(T, getScoreUB(T));
+ } else {
+ NeedWait = CNT_MASK(T);
+ setScoreLB(T, ScoreToWait);
+ }
+ }
+
+ return NeedWait;
+}
+
+// Where there are multiple types of event in the bracket of a counter,
+// the decrement may go out of order.
+bool BlockWaitcntBrackets::counterOutOfOrder(InstCounterType T) {
+ switch (T) {
+ case VM_CNT:
+ return false;
+ case LGKM_CNT: {
+ if (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] &&
+ EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]) {
+ // Scalar memory read always can go out of order.
+ return true;
+ }
+ int NumEventTypes = 0;
+ if (EventUBs[LDS_ACCESS] > ScoreLBs[LGKM_CNT] &&
+ EventUBs[LDS_ACCESS] <= ScoreUBs[LGKM_CNT]) {
+ NumEventTypes++;
+ }
+ if (EventUBs[GDS_ACCESS] > ScoreLBs[LGKM_CNT] &&
+ EventUBs[GDS_ACCESS] <= ScoreUBs[LGKM_CNT]) {
+ NumEventTypes++;
+ }
+ if (EventUBs[SQ_MESSAGE] > ScoreLBs[LGKM_CNT] &&
+ EventUBs[SQ_MESSAGE] <= ScoreUBs[LGKM_CNT]) {
+ NumEventTypes++;
+ }
+ if (NumEventTypes <= 1) {
+ return false;
+ }
+ break;
+ }
+ case EXP_CNT: {
+ // If there has been a mixture of export types, then a waitcnt exp(0) is
+ // required.
+ if (MixedExpTypes)
+ return true;
+ int NumEventTypes = 0;
+ if (EventUBs[EXP_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
+ EventUBs[EXP_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
+ NumEventTypes++;
+ }
+ if (EventUBs[GDS_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
+ EventUBs[GDS_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
+ NumEventTypes++;
+ }
+ if (EventUBs[VMW_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
+ EventUBs[VMW_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
+ NumEventTypes++;
+ }
+ if (EventUBs[EXP_PARAM_ACCESS] > ScoreLBs[EXP_CNT] &&
+ EventUBs[EXP_PARAM_ACCESS] <= ScoreUBs[EXP_CNT]) {
+ NumEventTypes++;
+ }
+
+ if (EventUBs[EXP_POS_ACCESS] > ScoreLBs[EXP_CNT] &&
+ EventUBs[EXP_POS_ACCESS] <= ScoreUBs[EXP_CNT]) {
+ NumEventTypes++;
+ }
+
+ if (NumEventTypes <= 1) {
+ return false;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return true;
+}
+
+INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
+ false)
+INITIALIZE_PASS_END(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
+ false)
+
+char SIInsertWaitcnts::ID = 0;
+
+char &llvm::SIInsertWaitcntsID = SIInsertWaitcnts::ID;
+
+FunctionPass *llvm::createSIInsertWaitcntsPass() {
+ return new SIInsertWaitcnts();
+}
+
+static bool readsVCCZ(const MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) &&
+ !MI.getOperand(1).isUndef();
+}
+
+/// \brief Generate s_waitcnt instruction to be placed before cur_Inst.
+/// Instructions of a given type are returned in order,
+/// but instructions of different types can complete out of order.
+/// We rely on this in-order completion
+/// and simply assign a score to the memory access instructions.
+/// We keep track of the active "score bracket" to determine
+/// if an access of a memory read requires an s_waitcnt
+/// and if so what the value of each counter is.
+/// The "score bracket" is bound by the lower bound and upper bound
+/// scores (*_score_LB and *_score_ub respectively).
+MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore(
+ MachineInstr &MI, BlockWaitcntBrackets *ScoreBrackets) {
+ // To emit, or not to emit - that's the question!
+ // Start with an assumption that there is no need to emit.
+ unsigned int EmitSwaitcnt = 0;
+ // s_waitcnt instruction to return; default is NULL.
+ MachineInstr *SWaitInst = nullptr;
+ // No need to wait before phi. If a phi-move exists, then the wait should
+ // has been inserted before the move. If a phi-move does not exist, then
+ // wait should be inserted before the real use. The same is true for
+ // sc-merge. It is not a coincident that all these cases correspond to the
+ // instructions that are skipped in the assembling loop.
+ bool NeedLineMapping = false; // TODO: Check on this.
+ if (MI.isDebugValue() &&
+ // TODO: any other opcode?
+ !NeedLineMapping) {
+ return SWaitInst;
+ }
+
+ // See if an s_waitcnt is forced at block entry, or is needed at
+ // program end.
+ if (ScoreBrackets->getWaitAtBeginning()) {
+ // Note that we have already cleared the state, so we don't need to update
+ // it.
+ ScoreBrackets->clearWaitAtBeginning();
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ EmitSwaitcnt |= CNT_MASK(T);
+ ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
+ }
+ }
+
+ // See if this instruction has a forced S_WAITCNT VM.
+ // TODO: Handle other cases of NeedsWaitcntVmBefore()
+ else if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
+ MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
+ MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL) {
+ EmitSwaitcnt |=
+ ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
+ }
+
+ // All waits must be resolved at call return.
+ // NOTE: this could be improved with knowledge of all call sites or
+ // with knowledge of the called routines.
+ if (MI.getOpcode() == AMDGPU::RETURN ||
+ MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) {
+ ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
+ EmitSwaitcnt |= CNT_MASK(T);
+ }
+ }
+ }
+ // Resolve vm waits before gs-done.
+ else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
+ MI.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
+ ((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_) ==
+ AMDGPU::SendMsg::ID_GS_DONE)) {
+ if (ScoreBrackets->getScoreUB(VM_CNT) > ScoreBrackets->getScoreLB(VM_CNT)) {
+ ScoreBrackets->setScoreLB(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
+ EmitSwaitcnt |= CNT_MASK(VM_CNT);
+ }
+ }
+#if 0 // TODO: the following blocks of logic when we have fence.
+ else if (MI.getOpcode() == SC_FENCE) {
+ const unsigned int group_size =
+ context->shader_info->GetMaxThreadGroupSize();
+ // group_size == 0 means thread group size is unknown at compile time
+ const bool group_is_multi_wave =
+ (group_size == 0 || group_size > target_info->GetWaveFrontSize());
+ const bool fence_is_global = !((SCInstInternalMisc*)Inst)->IsGroupFence();
+
+ for (unsigned int i = 0; i < Inst->NumSrcOperands(); i++) {
+ SCRegType src_type = Inst->GetSrcType(i);
+ switch (src_type) {
+ case SCMEM_LDS:
+ if (group_is_multi_wave ||
+ context->OptFlagIsOn(OPT_R1100_LDSMEM_FENCE_CHICKEN_BIT)) {
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(LGKM_CNT,
+ ScoreBrackets->getScoreUB(LGKM_CNT));
+ // LDS may have to wait for VM_CNT after buffer load to LDS
+ if (target_info->HasBufferLoadToLDS()) {
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(VM_CNT,
+ ScoreBrackets->getScoreUB(VM_CNT));
+ }
+ }
+ break;
+
+ case SCMEM_GDS:
+ if (group_is_multi_wave || fence_is_global) {
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(EXP_CNT,
+ ScoreBrackets->getScoreUB(EXP_CNT));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(LGKM_CNT,
+ ScoreBrackets->getScoreUB(LGKM_CNT));
+ }
+ break;
+
+ case SCMEM_UAV:
+ case SCMEM_TFBUF:
+ case SCMEM_RING:
+ case SCMEM_SCATTER:
+ if (group_is_multi_wave || fence_is_global) {
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(EXP_CNT,
+ ScoreBrackets->getScoreUB(EXP_CNT));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(VM_CNT,
+ ScoreBrackets->getScoreUB(VM_CNT));
+ }
+ break;
+
+ case SCMEM_SCRATCH:
+ default:
+ break;
+ }
+ }
+ }
+#endif
+
+ // Export & GDS instructions do not read the EXEC mask until after the export
+ // is granted (which can occur well after the instruction is issued).
+ // The shader program must flush all EXP operations on the export-count
+ // before overwriting the EXEC mask.
+ else {
+ if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
+ // Export and GDS are tracked individually, either may trigger a waitcnt
+ // for EXEC.
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getEventUB(EXP_GPR_LOCK));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getEventUB(EXP_PARAM_ACCESS));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getEventUB(EXP_POS_ACCESS));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getEventUB(GDS_GPR_LOCK));
+ }
+
+#if 0 // TODO: the following code to handle CALL.
+ // The argument passing for CALLs should suffice for VM_CNT and LGKM_CNT.
+ // However, there is a problem with EXP_CNT, because the call cannot
+ // easily tell if a register is used in the function, and if it did, then
+ // the referring instruction would have to have an S_WAITCNT, which is
+ // dependent on all call sites. So Instead, force S_WAITCNT for EXP_CNTs
+ // before the call.
+ if (MI.getOpcode() == SC_CALL) {
+ if (ScoreBrackets->getScoreUB(EXP_CNT) >
+ ScoreBrackets->getScoreLB(EXP_CNT)) {
+ ScoreBrackets->setScoreLB(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
+ EmitSwaitcnt |= CNT_MASK(EXP_CNT);
+ }
+ }
+#endif
+
+ // Look at the source operands of every instruction to see if
+ // any of them results from a previous memory operation that affects
+ // its current usage. If so, an s_waitcnt instruction needs to be
+ // emitted.
+ // If the source operand was defined by a load, add the s_waitcnt
+ // instruction.
+ for (const MachineMemOperand *Memop : MI.memoperands()) {
+ unsigned AS = Memop->getAddrSpace();
+ if (AS != AMDGPUASI.LOCAL_ADDRESS)
+ continue;
+ unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
+ // VM_CNT is only relevant to vgpr or LDS.
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+ }
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &Op = MI.getOperand(I);
+ const MachineRegisterInfo &MRIA = *MRI;
+ RegInterval Interval =
+ ScoreBrackets->getRegInterval(&MI, TII, MRI, TRI, I, false);
+ for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ if (TRI->isVGPR(MRIA, Op.getReg())) {
+ // VM_CNT is only relevant to vgpr or LDS.
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+ }
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ LGKM_CNT, ScoreBrackets->getRegScore(RegNo, LGKM_CNT));
+ }
+ }
+ // End of for loop that looks at all source operands to decide vm_wait_cnt
+ // and lgk_wait_cnt.
+
+ // Two cases are handled for destination operands:
+ // 1) If the destination operand was defined by a load, add the s_waitcnt
+ // instruction to guarantee the right WAW order.
+ // 2) If a destination operand that was used by a recent export/store ins,
+ // add s_waitcnt on exp_cnt to guarantee the WAR order.
+ if (MI.mayStore()) {
+ for (const MachineMemOperand *Memop : MI.memoperands()) {
+ unsigned AS = Memop->getAddrSpace();
+ if (AS != AMDGPUASI.LOCAL_ADDRESS)
+ continue;
+ unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getRegScore(RegNo, EXP_CNT));
+ }
+ }
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ MachineOperand &Def = MI.getOperand(I);
+ const MachineRegisterInfo &MRIA = *MRI;
+ RegInterval Interval =
+ ScoreBrackets->getRegInterval(&MI, TII, MRI, TRI, I, true);
+ for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ if (TRI->isVGPR(MRIA, Def.getReg())) {
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getRegScore(RegNo, EXP_CNT));
+ }
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ LGKM_CNT, ScoreBrackets->getRegScore(RegNo, LGKM_CNT));
+ }
+ } // End of for loop that looks at all dest operands.
+ }
+
+ // TODO: Tie force zero to a compiler triage option.
+ bool ForceZero = false;
+
+ // Check to see if this is an S_BARRIER, and if an implicit S_WAITCNT 0
+ // occurs before the instruction. Doing it here prevents any additional
+ // S_WAITCNTs from being emitted if the instruction was marked as
+ // requiring a WAITCNT beforehand.
+ if (MI.getOpcode() == AMDGPU::S_BARRIER && ST->needWaitcntBeforeBarrier()) {
+ EmitSwaitcnt |=
+ ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ LGKM_CNT, ScoreBrackets->getScoreUB(LGKM_CNT));
+ }
+
+ // TODO: Remove this work-around, enable the assert for Bug 457939
+ // after fixing the scheduler. Also, the Shader Compiler code is
+ // independent of target.
+ if (readsVCCZ(MI) && ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
+ if (ScoreBrackets->getScoreLB(LGKM_CNT) <
+ ScoreBrackets->getScoreUB(LGKM_CNT) &&
+ ScoreBrackets->hasPendingSMEM()) {
+ // Wait on everything, not just LGKM. vccz reads usually come from
+ // terminators, and we always wait on everything at the end of the
+ // block, so if we only wait on LGKM here, we might end up with
+ // another s_waitcnt inserted right after this if there are non-LGKM
+ // instructions still outstanding.
+ ForceZero = true;
+ EmitSwaitcnt = true;
+ }
+ }
+
+ // Does this operand processing indicate s_wait counter update?
+ if (EmitSwaitcnt) {
+ int CntVal[NUM_INST_CNTS];
+
+ bool UseDefaultWaitcntStrategy = true;
+ if (ForceZero) {
+ // Force all waitcnts to 0.
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
+ }
+ CntVal[VM_CNT] = 0;
+ CntVal[EXP_CNT] = 0;
+ CntVal[LGKM_CNT] = 0;
+ UseDefaultWaitcntStrategy = false;
+ }
+
+ if (UseDefaultWaitcntStrategy) {
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ if (EmitSwaitcnt & CNT_MASK(T)) {
+ int Delta =
+ ScoreBrackets->getScoreUB(T) - ScoreBrackets->getScoreLB(T);
+ int MaxDelta = ScoreBrackets->getWaitCountMax(T);
+ if (Delta >= MaxDelta) {
+ Delta = -1;
+ if (T != EXP_CNT) {
+ ScoreBrackets->setScoreLB(
+ T, ScoreBrackets->getScoreUB(T) - MaxDelta);
+ }
+ EmitSwaitcnt &= ~CNT_MASK(T);
+ }
+ CntVal[T] = Delta;
+ } else {
+ // If we are not waiting for a particular counter then encode
+ // it as -1 which means "don't care."
+ CntVal[T] = -1;
+ }
+ }
+ }
+
+ // If we are not waiting on any counter we can skip the wait altogether.
+ if (EmitSwaitcnt != 0) {
+ MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
+ int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
+ if (!OldWaitcnt || (AMDGPU::decodeVmcnt(IV, Imm) !=
+ (CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
+ (AMDGPU::decodeExpcnt(IV, Imm) !=
+ (CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
+ (AMDGPU::decodeLgkmcnt(IV, Imm) !=
+ (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
+ MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
+ if (ContainingLoop) {
+ MachineBasicBlock *TBB = ContainingLoop->getTopBlock();
+ BlockWaitcntBrackets *ScoreBracket =
+ BlockWaitcntBracketsMap[TBB].get();
+ if (!ScoreBracket) {
+ assert(BlockVisitedSet.find(TBB) == BlockVisitedSet.end());
+ BlockWaitcntBracketsMap[TBB] = make_unique<BlockWaitcntBrackets>();
+ ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
+ }
+ ScoreBracket->setRevisitLoop(true);
+ DEBUG(dbgs() << "set-revisit: block"
+ << ContainingLoop->getTopBlock()->getNumber() << '\n';);
+ }
+ }
+
+ // Update an existing waitcount, or make a new one.
+ MachineFunction &MF = *MI.getParent()->getParent();
+ if (OldWaitcnt && OldWaitcnt->getOpcode() != AMDGPU::S_WAITCNT) {
+ SWaitInst = OldWaitcnt;
+ } else {
+ SWaitInst = MF.CreateMachineInstr(TII->get(AMDGPU::S_WAITCNT),
+ MI.getDebugLoc());
+ CompilerGeneratedWaitcntSet.insert(SWaitInst);
+ }
+
+ const MachineOperand &Op =
+ MachineOperand::CreateImm(AMDGPU::encodeWaitcnt(
+ IV, CntVal[VM_CNT], CntVal[EXP_CNT], CntVal[LGKM_CNT]));
+ SWaitInst->addOperand(MF, Op);
+
+ if (CntVal[EXP_CNT] == 0) {
+ ScoreBrackets->setMixedExpTypes(false);
+ }
+ }
+ }
+
+ return SWaitInst;
+}
+
+void SIInsertWaitcnts::insertWaitcntBeforeCF(MachineBasicBlock &MBB,
+ MachineInstr *Waitcnt) {
+ if (MBB.empty()) {
+ MBB.push_back(Waitcnt);
+ return;
+ }
+
+ MachineBasicBlock::iterator It = MBB.end();
+ MachineInstr *MI = &*(--It);
+ if (MI->isBranch()) {
+ MBB.insert(It, Waitcnt);
+ } else {
+ MBB.push_back(Waitcnt);
+ }
+
+ return;
+}
+
+void SIInsertWaitcnts::updateEventWaitCntAfter(
+ MachineInstr &Inst, BlockWaitcntBrackets *ScoreBrackets) {
+ // Now look at the instruction opcode. If it is a memory access
+ // instruction, update the upper-bound of the appropriate counter's
+ // bracket and the destination operand scores.
+ // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
+ if (TII->isDS(Inst) && (Inst.mayLoad() || Inst.mayStore())) {
+ if (TII->getNamedOperand(Inst, AMDGPU::OpName::gds)->getImm() != 0) {
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
+ } else {
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
+ }
+ } else if (TII->isFLAT(Inst)) {
+ assert(Inst.mayLoad() || Inst.mayStore());
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
+
+ // This is a flat memory operation. Check to see if it has memory
+ // tokens for both LDS and Memory, and if so mark it as a flat.
+ bool FoundLDSMem = false;
+ for (const MachineMemOperand *Memop : Inst.memoperands()) {
+ unsigned AS = Memop->getAddrSpace();
+ if (AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS)
+ FoundLDSMem = true;
+ }
+
+ // This is a flat memory operation, so note it - it will require
+ // that both the VM and LGKM be flushed to zero if it is pending when
+ // a VM or LGKM dependency occurs.
+ if (FoundLDSMem) {
+ ScoreBrackets->setPendingFlat();
+ }
+ } else if (SIInstrInfo::isVMEM(Inst) &&
+ // TODO: get a better carve out.
+ Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1 &&
+ Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_SC &&
+ Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL) {
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
+ if ( // TODO: assumed yes -- target_info->MemWriteNeedsExpWait() &&
+ (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()))) {
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
+ }
+ } else if (TII->isSMRD(Inst)) {
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
+ } else {
+ switch (Inst.getOpcode()) {
+ case AMDGPU::S_SENDMSG:
+ case AMDGPU::S_SENDMSGHALT:
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, SQ_MESSAGE, Inst);
+ break;
+ case AMDGPU::EXP:
+ case AMDGPU::EXP_DONE: {
+ int Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm();
+ if (Imm >= 32 && Imm <= 63)
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_PARAM_ACCESS, Inst);
+ else if (Imm >= 12 && Imm <= 15)
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_POS_ACCESS, Inst);
+ else
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_GPR_LOCK, Inst);
+ break;
+ }
+ case AMDGPU::S_MEMTIME:
+ case AMDGPU::S_MEMREALTIME:
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
+ BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
+ int32_t MaxPending[NUM_INST_CNTS] = {0};
+ int32_t MaxFlat[NUM_INST_CNTS] = {0};
+ bool MixedExpTypes = false;
+
+ // Clear the score bracket state.
+ ScoreBrackets->clear();
+
+ // Compute the number of pending elements on block entry.
+
+ // IMPORTANT NOTE: If iterative handling of loops is added, the code will
+ // need to handle single BBs with backedges to themselves. This means that
+ // they will need to retain and not clear their initial state.
+
+ // See if there are any uninitialized predecessors. If so, emit an
+ // s_waitcnt 0 at the beginning of the block.
+ for (MachineBasicBlock *pred : Block.predecessors()) {
+ BlockWaitcntBrackets *PredScoreBrackets =
+ BlockWaitcntBracketsMap[pred].get();
+ bool Visited = BlockVisitedSet.find(pred) != BlockVisitedSet.end();
+ if (!Visited || PredScoreBrackets->getWaitAtBeginning()) {
+ break;
+ }
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ int span =
+ PredScoreBrackets->getScoreUB(T) - PredScoreBrackets->getScoreLB(T);
+ MaxPending[T] = std::max(MaxPending[T], span);
+ span =
+ PredScoreBrackets->pendingFlat(T) - PredScoreBrackets->getScoreLB(T);
+ MaxFlat[T] = std::max(MaxFlat[T], span);
+ }
+
+ MixedExpTypes |= PredScoreBrackets->mixedExpTypes();
+ }
+
+ // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
+ // Also handle kills for exit block.
+ if (Block.succ_empty() && !KillWaitBrackets.empty()) {
+ for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ int Span = KillWaitBrackets[I]->getScoreUB(T) -
+ KillWaitBrackets[I]->getScoreLB(T);
+ MaxPending[T] = std::max(MaxPending[T], Span);
+ Span = KillWaitBrackets[I]->pendingFlat(T) -
+ KillWaitBrackets[I]->getScoreLB(T);
+ MaxFlat[T] = std::max(MaxFlat[T], Span);
+ }
+
+ MixedExpTypes |= KillWaitBrackets[I]->mixedExpTypes();
+ }
+ }
+
+ // Special handling for GDS_GPR_LOCK and EXP_GPR_LOCK.
+ for (MachineBasicBlock *Pred : Block.predecessors()) {
+ BlockWaitcntBrackets *PredScoreBrackets =
+ BlockWaitcntBracketsMap[Pred].get();
+ bool Visited = BlockVisitedSet.find(Pred) != BlockVisitedSet.end();
+ if (!Visited || PredScoreBrackets->getWaitAtBeginning()) {
+ break;
+ }
+
+ int GDSSpan = PredScoreBrackets->getEventUB(GDS_GPR_LOCK) -
+ PredScoreBrackets->getScoreLB(EXP_CNT);
+ MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
+ int EXPSpan = PredScoreBrackets->getEventUB(EXP_GPR_LOCK) -
+ PredScoreBrackets->getScoreLB(EXP_CNT);
+ MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
+ }
+
+ // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
+ if (Block.succ_empty() && !KillWaitBrackets.empty()) {
+ for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
+ int GDSSpan = KillWaitBrackets[I]->getEventUB(GDS_GPR_LOCK) -
+ KillWaitBrackets[I]->getScoreLB(EXP_CNT);
+ MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
+ int EXPSpan = KillWaitBrackets[I]->getEventUB(EXP_GPR_LOCK) -
+ KillWaitBrackets[I]->getScoreLB(EXP_CNT);
+ MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
+ }
+ }
+
+#if 0
+ // LC does not (unlike) add a waitcnt at beginning. Leaving it as marker.
+ // TODO: how does LC distinguish between function entry and main entry?
+ // If this is the entry to a function, force a wait.
+ MachineBasicBlock &Entry = Block.getParent()->front();
+ if (Entry.getNumber() == Block.getNumber()) {
+ ScoreBrackets->setWaitAtBeginning();
+ return;
+ }
+#endif
+
+ // Now set the current Block's brackets to the largest ending bracket.
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ ScoreBrackets->setScoreUB(T, MaxPending[T]);
+ ScoreBrackets->setScoreLB(T, 0);
+ ScoreBrackets->setLastFlat(T, MaxFlat[T]);
+ }
+
+ ScoreBrackets->setMixedExpTypes(MixedExpTypes);
+
+ // Set the register scoreboard.
+ for (MachineBasicBlock *Pred : Block.predecessors()) {
+ if (BlockVisitedSet.find(Pred) == BlockVisitedSet.end()) {
+ break;
+ }
+
+ BlockWaitcntBrackets *PredScoreBrackets =
+ BlockWaitcntBracketsMap[Pred].get();
+
+ // Now merge the gpr_reg_score information
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ int PredLB = PredScoreBrackets->getScoreLB(T);
+ int PredUB = PredScoreBrackets->getScoreUB(T);
+ if (PredLB < PredUB) {
+ int PredScale = MaxPending[T] - PredUB;
+ // Merge vgpr scores.
+ for (int J = 0; J <= PredScoreBrackets->getMaxVGPR(); J++) {
+ int PredRegScore = PredScoreBrackets->getRegScore(J, T);
+ if (PredRegScore <= PredLB)
+ continue;
+ int NewRegScore = PredScale + PredRegScore;
+ ScoreBrackets->setRegScore(
+ J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
+ }
+ // Also need to merge sgpr scores for lgkm_cnt.
+ if (T == LGKM_CNT) {
+ for (int J = 0; J <= PredScoreBrackets->getMaxSGPR(); J++) {
+ int PredRegScore =
+ PredScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
+ if (PredRegScore <= PredLB)
+ continue;
+ int NewRegScore = PredScale + PredRegScore;
+ ScoreBrackets->setRegScore(
+ J + NUM_ALL_VGPRS, LGKM_CNT,
+ std::max(
+ ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
+ NewRegScore));
+ }
+ }
+ }
+ }
+
+ // Also merge the WaitEvent information.
+ ForAllWaitEventType(W) {
+ enum InstCounterType T = PredScoreBrackets->eventCounter(W);
+ int PredEventUB = PredScoreBrackets->getEventUB(W);
+ if (PredEventUB > PredScoreBrackets->getScoreLB(T)) {
+ int NewEventUB =
+ MaxPending[T] + PredEventUB - PredScoreBrackets->getScoreUB(T);
+ if (NewEventUB > 0) {
+ ScoreBrackets->setEventUB(
+ W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
+ }
+ }
+ }
+ }
+
+ // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
+ // Set the register scoreboard.
+ if (Block.succ_empty() && !KillWaitBrackets.empty()) {
+ for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
+ // Now merge the gpr_reg_score information.
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ int PredLB = KillWaitBrackets[I]->getScoreLB(T);
+ int PredUB = KillWaitBrackets[I]->getScoreUB(T);
+ if (PredLB < PredUB) {
+ int PredScale = MaxPending[T] - PredUB;
+ // Merge vgpr scores.
+ for (int J = 0; J <= KillWaitBrackets[I]->getMaxVGPR(); J++) {
+ int PredRegScore = KillWaitBrackets[I]->getRegScore(J, T);
+ if (PredRegScore <= PredLB)
+ continue;
+ int NewRegScore = PredScale + PredRegScore;
+ ScoreBrackets->setRegScore(
+ J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
+ }
+ // Also need to merge sgpr scores for lgkm_cnt.
+ if (T == LGKM_CNT) {
+ for (int J = 0; J <= KillWaitBrackets[I]->getMaxSGPR(); J++) {
+ int PredRegScore =
+ KillWaitBrackets[I]->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
+ if (PredRegScore <= PredLB)
+ continue;
+ int NewRegScore = PredScale + PredRegScore;
+ ScoreBrackets->setRegScore(
+ J + NUM_ALL_VGPRS, LGKM_CNT,
+ std::max(
+ ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
+ NewRegScore));
+ }
+ }
+ }
+ }
+
+ // Also merge the WaitEvent information.
+ ForAllWaitEventType(W) {
+ enum InstCounterType T = KillWaitBrackets[I]->eventCounter(W);
+ int PredEventUB = KillWaitBrackets[I]->getEventUB(W);
+ if (PredEventUB > KillWaitBrackets[I]->getScoreLB(T)) {
+ int NewEventUB =
+ MaxPending[T] + PredEventUB - KillWaitBrackets[I]->getScoreUB(T);
+ if (NewEventUB > 0) {
+ ScoreBrackets->setEventUB(
+ W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
+ }
+ }
+ }
+ }
+ }
+
+ // Special case handling of GDS_GPR_LOCK and EXP_GPR_LOCK. Merge this for the
+ // sequencing predecessors, because changes to EXEC require waitcnts due to
+ // the delayed nature of these operations.
+ for (MachineBasicBlock *Pred : Block.predecessors()) {
+ if (BlockVisitedSet.find(Pred) == BlockVisitedSet.end()) {
+ break;
+ }
+
+ BlockWaitcntBrackets *PredScoreBrackets =
+ BlockWaitcntBracketsMap[Pred].get();
+
+ int pred_gds_ub = PredScoreBrackets->getEventUB(GDS_GPR_LOCK);
+ if (pred_gds_ub > PredScoreBrackets->getScoreLB(EXP_CNT)) {
+ int new_gds_ub = MaxPending[EXP_CNT] + pred_gds_ub -
+ PredScoreBrackets->getScoreUB(EXP_CNT);
+ if (new_gds_ub > 0) {
+ ScoreBrackets->setEventUB(
+ GDS_GPR_LOCK,
+ std::max(ScoreBrackets->getEventUB(GDS_GPR_LOCK), new_gds_ub));
+ }
+ }
+ int pred_exp_ub = PredScoreBrackets->getEventUB(EXP_GPR_LOCK);
+ if (pred_exp_ub > PredScoreBrackets->getScoreLB(EXP_CNT)) {
+ int new_exp_ub = MaxPending[EXP_CNT] + pred_exp_ub -
+ PredScoreBrackets->getScoreUB(EXP_CNT);
+ if (new_exp_ub > 0) {
+ ScoreBrackets->setEventUB(
+ EXP_GPR_LOCK,
+ std::max(ScoreBrackets->getEventUB(EXP_GPR_LOCK), new_exp_ub));
+ }
+ }
+ }
+}
+
+/// Return the "bottom" block of a loop. This differs from
+/// MachineLoop::getBottomBlock in that it works even if the loop is
+/// discontiguous.
+MachineBasicBlock *SIInsertWaitcnts::loopBottom(const MachineLoop *Loop) {
+ MachineBasicBlock *Bottom = Loop->getHeader();
+ for (MachineBasicBlock *MBB : Loop->blocks())
+ if (MBB->getNumber() > Bottom->getNumber())
+ Bottom = MBB;
+ return Bottom;
+}
+
+// Generate s_waitcnt instructions where needed.
+void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
+ MachineBasicBlock &Block) {
+ // Initialize the state information.
+ mergeInputScoreBrackets(Block);
+
+ BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
+
+ DEBUG({
+ dbgs() << "Block" << Block.getNumber();
+ ScoreBrackets->dump();
+ });
+
+ bool InsertNOP = false;
+
+ // Walk over the instructions.
+ for (MachineBasicBlock::iterator Iter = Block.begin(), E = Block.end();
+ Iter != E;) {
+ MachineInstr &Inst = *Iter;
+ // Remove any previously existing waitcnts.
+ if (Inst.getOpcode() == AMDGPU::S_WAITCNT) {
+ // TODO: Register the old waitcnt and optimize the following waitcnts.
+ // Leaving the previously existing waitcnts is conservatively correct.
+ if (CompilerGeneratedWaitcntSet.find(&Inst) ==
+ CompilerGeneratedWaitcntSet.end())
+ ++Iter;
+ else {
+ ScoreBrackets->setWaitcnt(&Inst);
+ ++Iter;
+ Inst.removeFromParent();
+ }
+ continue;
+ }
+
+ // Kill instructions generate a conditional branch to the endmain block.
+ // Merge the current waitcnt state into the endmain block information.
+ // TODO: Are there other flavors of KILL instruction?
+ if (Inst.getOpcode() == AMDGPU::KILL) {
+ addKillWaitBracket(ScoreBrackets);
+ }
+
+ bool VCCZBugWorkAround = false;
+ if (readsVCCZ(Inst) &&
+ (VCCZBugHandledSet.find(&Inst) == VCCZBugHandledSet.end())) {
+ if (ScoreBrackets->getScoreLB(LGKM_CNT) <
+ ScoreBrackets->getScoreUB(LGKM_CNT) &&
+ ScoreBrackets->hasPendingSMEM()) {
+ if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS)
+ VCCZBugWorkAround = true;
+ }
+ }
+
+ // Generate an s_waitcnt instruction to be placed before
+ // cur_Inst, if needed.
+ MachineInstr *SWaitInst = generateSWaitCntInstBefore(Inst, ScoreBrackets);
+
+ if (SWaitInst) {
+ Block.insert(Inst, SWaitInst);
+ if (ScoreBrackets->getWaitcnt() != SWaitInst) {
+ DEBUG(dbgs() << "insertWaitcntInBlock\n"
+ << "Old Instr: " << Inst << '\n'
+ << "New Instr: " << *SWaitInst << '\n';);
+ }
+ }
+
+ updateEventWaitCntAfter(Inst, ScoreBrackets);
+
+#if 0 // TODO: implement resource type check controlled by options with ub = LB.
+ // If this instruction generates a S_SETVSKIP because it is an
+ // indexed resource, and we are on Tahiti, then it will also force
+ // an S_WAITCNT vmcnt(0)
+ if (RequireCheckResourceType(Inst, context)) {
+ // Force the score to as if an S_WAITCNT vmcnt(0) is emitted.
+ ScoreBrackets->setScoreLB(VM_CNT,
+ ScoreBrackets->getScoreUB(VM_CNT));
+ }
+#endif
+
+ ScoreBrackets->clearWaitcnt();
+
+ if (SWaitInst) {
+ DEBUG({ SWaitInst->print(dbgs() << '\n'); });
+ }
+ DEBUG({
+ Inst.print(dbgs());
+ ScoreBrackets->dump();
+ });
+
+ // Check to see if this is a GWS instruction. If so, and if this is CI or
+ // VI, then the generated code sequence will include an S_WAITCNT 0.
+ // TODO: Are these the only GWS instructions?
+ if (Inst.getOpcode() == AMDGPU::DS_GWS_INIT ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_V ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
+ // TODO: && context->target_info->GwsRequiresMemViolTest() ) {
+ ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
+ ScoreBrackets->updateByWait(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
+ ScoreBrackets->updateByWait(LGKM_CNT,
+ ScoreBrackets->getScoreUB(LGKM_CNT));
+ }
+
+ // TODO: Remove this work-around after fixing the scheduler and enable the
+ // assert above.
+ if (VCCZBugWorkAround) {
+ // Restore the vccz bit. Any time a value is written to vcc, the vcc
+ // bit is updated, so we can restore the bit by reading the value of
+ // vcc and then writing it back to the register.
+ BuildMI(Block, Inst, Inst.getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
+ AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
+ VCCZBugHandledSet.insert(&Inst);
+ }
+
+ if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
+
+ // This avoids a s_nop after a waitcnt has just been inserted.
+ if (!SWaitInst && InsertNOP) {
+ BuildMI(Block, Inst, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
+ }
+ InsertNOP = false;
+
+ // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
+ // or SMEM clause, respectively.
+ //
+ // The temporary workaround is to break the clauses with S_NOP.
+ //
+ // The proper solution would be to allocate registers such that all source
+ // and destination registers don't overlap, e.g. this is illegal:
+ // r0 = load r2
+ // r2 = load r0
+ bool IsSMEM = false;
+ bool IsVMEM = false;
+ if (TII->isSMRD(Inst))
+ IsSMEM = true;
+ else if (TII->usesVM_CNT(Inst))
+ IsVMEM = true;
+
+ ++Iter;
+ if (Iter == E)
+ break;
+
+ MachineInstr &Next = *Iter;
+
+ // TODO: How about consecutive SMEM instructions?
+ // The comments above says break the clause but the code does not.
+ // if ((TII->isSMRD(next) && isSMEM) ||
+ if (!IsSMEM && TII->usesVM_CNT(Next) && IsVMEM &&
+ // TODO: Enable this check when hasSoftClause is upstreamed.
+ // ST->hasSoftClauses() &&
+ ST->isXNACKEnabled()) {
+ // Insert a NOP to break the clause.
+ InsertNOP = true;
+ continue;
+ }
+
+ // There must be "S_NOP 0" between an instruction writing M0 and
+ // S_SENDMSG.
+ if ((Next.getOpcode() == AMDGPU::S_SENDMSG ||
+ Next.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
+ Inst.definesRegister(AMDGPU::M0))
+ InsertNOP = true;
+
+ continue;
+ }
+
+ ++Iter;
+ }
+
+ // Check if we need to force convergence at loop footer.
+ MachineLoop *ContainingLoop = MLI->getLoopFor(&Block);
+ if (ContainingLoop && loopBottom(ContainingLoop) == &Block) {
+ LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
+ WaitcntData->print();
+ DEBUG(dbgs() << '\n';);
+
+ // The iterative waitcnt insertion algorithm aims for optimal waitcnt
+ // placement and doesn't always guarantee convergence for a loop. Each
+ // loop should take at most 2 iterations for it to converge naturally.
+ // When this max is reached and result doesn't converge, we force
+ // convergence by inserting a s_waitcnt at the end of loop footer.
+ if (WaitcntData->getIterCnt() > 2) {
+ // To ensure convergence, need to make wait events at loop footer be no
+ // more than those from the previous iteration.
+ // As a simplification, Instead of tracking individual scores and
+ // generate the precise wait count, just wait on 0.
+ bool HasPending = false;
+ MachineInstr *SWaitInst = WaitcntData->getWaitcnt();
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) {
+ ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
+ HasPending = true;
+ }
+ }
+
+ if (HasPending) {
+ if (!SWaitInst) {
+ SWaitInst = Block.getParent()->CreateMachineInstr(
+ TII->get(AMDGPU::S_WAITCNT), DebugLoc());
+ CompilerGeneratedWaitcntSet.insert(SWaitInst);
+ const MachineOperand &Op = MachineOperand::CreateImm(0);
+ SWaitInst->addOperand(MF, Op);
+#if 0 // TODO: Format the debug output
+ OutputTransformBanner("insertWaitcntInBlock",0,"Create:",context);
+ OutputTransformAdd(SWaitInst, context);
+#endif
+ }
+#if 0 // TODO: ??
+ _DEV( REPORTED_STATS->force_waitcnt_converge = 1; )
+#endif
+ }
+
+ if (SWaitInst) {
+ DEBUG({
+ SWaitInst->print(dbgs());
+ dbgs() << "\nAdjusted score board:";
+ ScoreBrackets->dump();
+ });
+
+ // Add this waitcnt to the block. It is either newly created or
+ // created in previous iterations and added back since block traversal
+ // always remove waitcnt.
+ insertWaitcntBeforeCF(Block, SWaitInst);
+ WaitcntData->setWaitcnt(SWaitInst);
+ }
+ }
+ }
+}
+
+bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
+ ST = &MF.getSubtarget<SISubtarget>();
+ TII = ST->getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
+ AMDGPUASI = ST->getAMDGPUAS();
+
+ HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
+ HardwareLimits.ExpcntMax = AMDGPU::getExpcntBitMask(IV);
+ HardwareLimits.LgkmcntMax = AMDGPU::getLgkmcntBitMask(IV);
+
+ HardwareLimits.NumVGPRsMax = ST->getAddressableNumVGPRs();
+ HardwareLimits.NumSGPRsMax = ST->getAddressableNumSGPRs();
+ assert(HardwareLimits.NumVGPRsMax <= SQ_MAX_PGM_VGPRS);
+ assert(HardwareLimits.NumSGPRsMax <= SQ_MAX_PGM_SGPRS);
+
+ RegisterEncoding.VGPR0 = TRI->getEncodingValue(AMDGPU::VGPR0);
+ RegisterEncoding.VGPRL =
+ RegisterEncoding.VGPR0 + HardwareLimits.NumVGPRsMax - 1;
+ RegisterEncoding.SGPR0 = TRI->getEncodingValue(AMDGPU::SGPR0);
+ RegisterEncoding.SGPRL =
+ RegisterEncoding.SGPR0 + HardwareLimits.NumSGPRsMax - 1;
+
+ // Walk over the blocks in reverse post-dominator order, inserting
+ // s_waitcnt where needed.
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ bool Modified = false;
+ for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
+ I = RPOT.begin(),
+ E = RPOT.end(), J = RPOT.begin();
+ I != E;) {
+ MachineBasicBlock &MBB = **I;
+
+ BlockVisitedSet.insert(&MBB);
+
+ BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
+ if (!ScoreBrackets) {
+ BlockWaitcntBracketsMap[&MBB] = make_unique<BlockWaitcntBrackets>();
+ ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
+ }
+ ScoreBrackets->setPostOrder(MBB.getNumber());
+ MachineLoop *ContainingLoop = MLI->getLoopFor(&MBB);
+ if (ContainingLoop && LoopWaitcntDataMap[ContainingLoop] == nullptr)
+ LoopWaitcntDataMap[ContainingLoop] = make_unique<LoopWaitcntData>();
+
+ // If we are walking into the block from before the loop, then guarantee
+ // at least 1 re-walk over the loop to propagate the information, even if
+ // no S_WAITCNT instructions were generated.
+ if (ContainingLoop && ContainingLoop->getTopBlock() == &MBB && J < I &&
+ (BlockWaitcntProcessedSet.find(&MBB) ==
+ BlockWaitcntProcessedSet.end())) {
+ BlockWaitcntBracketsMap[&MBB]->setRevisitLoop(true);
+ DEBUG(dbgs() << "set-revisit: block"
+ << ContainingLoop->getTopBlock()->getNumber() << '\n';);
+ }
+
+ // Walk over the instructions.
+ insertWaitcntInBlock(MF, MBB);
+
+ // Flag that waitcnts have been processed at least once.
+ BlockWaitcntProcessedSet.insert(&MBB);
+
+ // See if we want to revisit the loop.
+ if (ContainingLoop && loopBottom(ContainingLoop) == &MBB) {
+ MachineBasicBlock *EntryBB = ContainingLoop->getTopBlock();
+ BlockWaitcntBrackets *EntrySB = BlockWaitcntBracketsMap[EntryBB].get();
+ if (EntrySB && EntrySB->getRevisitLoop()) {
+ EntrySB->setRevisitLoop(false);
+ J = I;
+ int32_t PostOrder = EntrySB->getPostOrder();
+ // TODO: Avoid this loop. Find another way to set I.
+ for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
+ X = RPOT.begin(),
+ Y = RPOT.end();
+ X != Y; ++X) {
+ MachineBasicBlock &MBBX = **X;
+ if (MBBX.getNumber() == PostOrder) {
+ I = X;
+ break;
+ }
+ }
+ LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
+ WaitcntData->incIterCnt();
+ DEBUG(dbgs() << "revisit: block" << EntryBB->getNumber() << '\n';);
+ continue;
+ } else {
+ LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
+ // Loop converged, reset iteration count. If this loop gets revisited,
+ // it must be from an outer loop, the counter will restart, this will
+ // ensure we don't force convergence on such revisits.
+ WaitcntData->resetIterCnt();
+ }
+ }
+
+ J = I;
+ ++I;
+ }
+
+ SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
+
+ bool HaveScalarStores = false;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+ ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;
+ ++I) {
+
+ if (!HaveScalarStores && TII->isScalarStore(*I))
+ HaveScalarStores = true;
+
+ if (I->getOpcode() == AMDGPU::S_ENDPGM ||
+ I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
+ EndPgmBlocks.push_back(&MBB);
+ }
+ }
+
+ if (HaveScalarStores) {
+ // If scalar writes are used, the cache must be flushed or else the next
+ // wave to reuse the same scratch memory can be clobbered.
+ //
+ // Insert s_dcache_wb at wave termination points if there were any scalar
+ // stores, and only if the cache hasn't already been flushed. This could be
+ // improved by looking across blocks for flushes in postdominating blocks
+ // from the stores but an explicitly requested flush is probably very rare.
+ for (MachineBasicBlock *MBB : EndPgmBlocks) {
+ bool SeenDCacheWB = false;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+
+ if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
+ SeenDCacheWB = true;
+ else if (TII->isScalarStore(*I))
+ SeenDCacheWB = false;
+
+ // FIXME: It would be better to insert this before a waitcnt if any.
+ if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
+ I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) &&
+ !SeenDCacheWB) {
+ Modified = true;
+ BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
+ }
+ }
+ }
+ }
+
+ return Modified;
+}
diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp
index fceabd7a8fdd..47257ce16ceb 100644
--- a/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -21,16 +21,32 @@
#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <new>
+#include <utility>
#define DEBUG_TYPE "si-insert-waits"
using namespace llvm;
-using namespace llvm::AMDGPU;
namespace {
@@ -42,7 +58,6 @@ typedef union {
unsigned LGKM;
} Named;
unsigned Array[3];
-
} Counters;
typedef enum {
@@ -55,13 +70,12 @@ typedef Counters RegCounters[512];
typedef std::pair<unsigned, unsigned> RegInterval;
class SIInsertWaits : public MachineFunctionPass {
-
private:
- const SISubtarget *ST;
- const SIInstrInfo *TII;
- const SIRegisterInfo *TRI;
+ const SISubtarget *ST = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ const SIRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI;
- IsaVersion IV;
+ AMDGPU::IsaInfo::IsaVersion ISA;
/// \brief Constant zero value
static const Counters ZeroCounts;
@@ -86,7 +100,7 @@ private:
RegCounters DefinedRegs;
/// \brief Different export instruction types seen since last wait.
- unsigned ExpInstrTypesSeen;
+ unsigned ExpInstrTypesSeen = 0;
/// \brief Type of the last opcode.
InstType LastOpcodeType;
@@ -100,7 +114,7 @@ private:
bool ReturnsVoid;
/// Whether the VCCZ bit is possibly corrupt
- bool VCCZCorrupt;
+ bool VCCZCorrupt = false;
/// \brief Get increment/decrement amount for this instruction.
Counters getHwCounts(MachineInstr &MI);
@@ -141,13 +155,7 @@ private:
public:
static char ID;
- SIInsertWaits() :
- MachineFunctionPass(ID),
- ST(nullptr),
- TII(nullptr),
- TRI(nullptr),
- ExpInstrTypesSeen(0),
- VCCZCorrupt(false) { }
+ SIInsertWaits() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -161,7 +169,7 @@ public:
}
};
-} // End anonymous namespace
+} // end anonymous namespace
INITIALIZE_PASS_BEGIN(SIInsertWaits, DEBUG_TYPE,
"SI Insert Waits", false, false)
@@ -294,7 +302,6 @@ RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const Counters &Increment) {
-
// Get the hardware counter increments and sum them up
Counters Limit = ZeroCounts;
unsigned Sum = 0;
@@ -366,7 +373,6 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const Counters &Required) {
-
// End of program? No need to wait on anything
// A function not returning void needs to wait, because other bytecode will
// be appended after it and we don't know what it will be.
@@ -393,7 +399,6 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
bool NeedWait = false;
for (unsigned i = 0; i < 3; ++i) {
-
if (Required.Array[i] <= WaitedOn.Array[i])
continue;
@@ -421,10 +426,10 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
// Build the wait instruction
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
- .addImm(encodeWaitcnt(IV,
- Counts.Named.VM,
- Counts.Named.EXP,
- Counts.Named.LGKM));
+ .addImm(AMDGPU::encodeWaitcnt(ISA,
+ Counts.Named.VM,
+ Counts.Named.EXP,
+ Counts.Named.LGKM));
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
@@ -434,7 +439,6 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
/// \brief helper function for handleOperands
static void increaseCounters(Counters &Dst, const Counters &Src) {
-
for (unsigned i = 0; i < 3; ++i)
Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
}
@@ -453,9 +457,9 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
unsigned Imm = I->getOperand(0).getImm();
Counters Counts, WaitOn;
- Counts.Named.VM = decodeVmcnt(IV, Imm);
- Counts.Named.EXP = decodeExpcnt(IV, Imm);
- Counts.Named.LGKM = decodeLgkmcnt(IV, Imm);
+ Counts.Named.VM = AMDGPU::decodeVmcnt(ISA, Imm);
+ Counts.Named.EXP = AMDGPU::decodeExpcnt(ISA, Imm);
+ Counts.Named.LGKM = AMDGPU::decodeLgkmcnt(ISA, Imm);
for (unsigned i = 0; i < 3; ++i) {
if (Counts.Array[i] <= LastIssued.Array[i])
@@ -468,7 +472,6 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
}
Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
-
Counters Result = ZeroCounts;
// For each register affected by this instruction increase the result
@@ -484,7 +487,6 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
RegInterval Interval = getRegInterval(RC, Op);
for (unsigned j = Interval.first; j < Interval.second; ++j) {
-
if (Op.isDef()) {
increaseCounters(Result, UsedRegs[j]);
increaseCounters(Result, DefinedRegs[j]);
@@ -522,6 +524,16 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
}
}
+/// Return true if \p MBB has one successor immediately following, and is its
+/// only predecessor
+static bool hasTrivialSuccessor(const MachineBasicBlock &MBB) {
+ if (MBB.succ_size() != 1)
+ return false;
+
+ const MachineBasicBlock *Succ = *MBB.succ_begin();
+ return (Succ->pred_size() == 1) && MBB.isLayoutSuccessor(Succ);
+}
+
// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
@@ -531,12 +543,12 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
- IV = getIsaVersion(ST->getFeatureBits());
+ ISA = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- HardwareLimits.Named.VM = getVmcntBitMask(IV);
- HardwareLimits.Named.EXP = getExpcntBitMask(IV);
- HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
+ HardwareLimits.Named.VM = AMDGPU::getVmcntBitMask(ISA);
+ HardwareLimits.Named.EXP = AMDGPU::getExpcntBitMask(ISA);
+ HardwareLimits.Named.LGKM = AMDGPU::getLgkmcntBitMask(ISA);
WaitedOn = ZeroCounts;
DelayedWaitOn = ZeroCounts;
@@ -636,12 +648,14 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
handleSendMsg(MBB, I);
if (I->getOpcode() == AMDGPU::S_ENDPGM ||
- I->getOpcode() == AMDGPU::SI_RETURN)
+ I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
EndPgmBlocks.push_back(&MBB);
}
- // Wait for everything at the end of the MBB
- Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+ // Wait for everything at the end of the MBB. If there is only one
+ // successor, we can defer this until the uses there.
+ if (!hasTrivialSuccessor(MBB))
+ Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
}
if (HaveScalarStores) {
@@ -665,7 +679,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
// FIXME: It would be better to insert this before a waitcnt if any.
if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
- I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) {
+ I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) && !SeenDCacheWB) {
Changes = true;
BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
}
@@ -676,5 +690,19 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
for (MachineInstr *I : RemoveMI)
I->eraseFromParent();
+ if (!MFI->isEntryFunction()) {
+ // Wait for any outstanding memory operations that the input registers may
+ // depend on. We can't track them and it's better to to the wait after the
+ // costly call sequence.
+
+ // TODO: Could insert earlier and schedule more liberally with operations
+ // that only use caller preserved registers.
+ MachineBasicBlock &EntryBB = MF.front();
+ BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm(0);
+
+ Changes = true;
+ }
+
return Changes;
}
diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td
index 5523ec142ba7..b83a1fe187eb 100644
--- a/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/lib/Target/AMDGPU/SIInstrFormats.td
@@ -31,6 +31,7 @@ class InstSI <dag outs, dag ins, string asm = "",
field bit VOP2 = 0;
field bit VOPC = 0;
field bit VOP3 = 0;
+ field bit VOP3P = 0;
field bit VINTRP = 0;
field bit SDWA = 0;
field bit DPP = 0;
@@ -78,6 +79,10 @@ class InstSI <dag outs, dag ins, string asm = "",
// is unable to infer the encoding from the operands.
field bit VOPAsmPrefer32Bit = 0;
+ // This bit indicates that this has a floating point result type, so
+ // the clamp modifier has floating point semantics.
+ field bit FPClamp = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -92,6 +97,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{8} = VOP2;
let TSFlags{9} = VOPC;
let TSFlags{10} = VOP3;
+ let TSFlags{12} = VOP3P;
let TSFlags{13} = VINTRP;
let TSFlags{14} = SDWA;
@@ -120,6 +126,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{39} = ScalarStore;
let TSFlags{40} = FixedSize;
let TSFlags{41} = VOPAsmPrefer32Bit;
+ let TSFlags{42} = FPClamp;
let SchedRW = [Write32Bit];
@@ -131,19 +138,19 @@ class InstSI <dag outs, dag ins, string asm = "",
let AsmVariantName = AMDGPUAsmVariants.Default;
}
-class PseudoInstSI<dag outs, dag ins, list<dag> pattern = []>
- : InstSI<outs, ins, "", pattern> {
+class PseudoInstSI<dag outs, dag ins, list<dag> pattern = [], string asm = "">
+ : InstSI<outs, ins, asm, pattern> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
-class SPseudoInstSI<dag outs, dag ins, list<dag> pattern = []>
- : PseudoInstSI<outs, ins, pattern> {
+class SPseudoInstSI<dag outs, dag ins, list<dag> pattern = [], string asm = "">
+ : PseudoInstSI<outs, ins, pattern, asm> {
let SALU = 1;
}
-class VPseudoInstSI<dag outs, dag ins, list<dag> pattern = []>
- : PseudoInstSI<outs, ins, pattern> {
+class VPseudoInstSI<dag outs, dag ins, list<dag> pattern = [], string asm = "">
+ : PseudoInstSI<outs, ins, pattern, asm> {
let VALU = 1;
let Uses = [EXEC];
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 26a8d22062a9..05ac67d26620 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -36,7 +37,7 @@ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
cl::desc("Restrict range of branch instructions (DEBUG)"));
SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
- : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
+ : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@@ -315,7 +316,8 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
const MachineOperand *SecondDst = nullptr;
if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
- (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt))) {
+ (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
+ (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
} else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
@@ -346,6 +348,21 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold;
}
+static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) {
+ MachineFunction *MF = MBB.getParent();
+ DiagnosticInfoUnsupported IllegalCopy(*MF->getFunction(),
+ "illegal SGPR to VGPR copy",
+ DL, DS_Error);
+ LLVMContext &C = MF->getFunction()->getContext();
+ C.diagnose(IllegalCopy);
+
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DestReg,
@@ -369,7 +386,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+ if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
+ reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
+ return;
+ }
+
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
@@ -391,7 +412,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
+ if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
+ reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
+ return;
+ }
+
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
@@ -415,8 +440,14 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opcode = AMDGPU::S_MOV_B32;
EltSize = 4;
}
+
+ if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
+ reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
+ return;
+ }
}
+
ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
@@ -870,9 +901,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MachineInstr *MovRel =
BuildMI(MBB, MI, DL, MovRelDesc)
.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(2))
.addReg(VecReg, RegState::ImplicitDefine)
- .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0));
+ .addReg(VecReg,
+ RegState::Implicit | (IsUndef ? RegState::Undef : 0));
const int ImpDefIdx =
MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
@@ -897,14 +929,14 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// constant data.
Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
.addReg(RegLo)
- .addOperand(MI.getOperand(1)));
+ .add(MI.getOperand(1)));
MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
.addReg(RegHi);
if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE)
MIB.addImm(0);
else
- MIB.addOperand(MI.getOperand(2));
+ MIB.add(MI.getOperand(2));
Bundler.append(MIB);
llvm::finalizeBundle(MBB, Bundler.begin());
@@ -1290,6 +1322,13 @@ unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
return Count;
}
+// Copy the flags onto the implicit condition register operand.
+static void preserveCondRegFlags(MachineOperand &CondReg,
+ const MachineOperand &OrigCond) {
+ CondReg.setIsUndef(OrigCond.isUndef());
+ CondReg.setIsKill(OrigCond.isKill());
+}
+
unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
@@ -1317,9 +1356,7 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
.addMBB(TBB);
// Copy the flags onto the implicit condition register operand.
- MachineOperand &CondReg = CondBr->getOperand(1);
- CondReg.setIsUndef(Cond[1].isUndef());
- CondReg.setIsKill(Cond[1].isKill());
+ preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
if (BytesAdded)
*BytesAdded = 4;
@@ -1351,6 +1388,157 @@ bool SIInstrInfo::reverseBranchCondition(
return false;
}
+bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+ ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles,
+ int &TrueCycles, int &FalseCycles) const {
+ switch (Cond[0].getImm()) {
+ case VCCNZ:
+ case VCCZ: {
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
+ assert(MRI.getRegClass(FalseReg) == RC);
+
+ int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
+ CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
+
+ // Limit to equal cost for branch vs. N v_cndmask_b32s.
+ return !RI.isSGPRClass(RC) && NumInsts <= 6;
+ }
+ case SCC_TRUE:
+ case SCC_FALSE: {
+ // FIXME: We could insert for VGPRs if we could replace the original compare
+ // with a vector one.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
+ assert(MRI.getRegClass(FalseReg) == RC);
+
+ int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
+
+ // Multiples of 8 can do s_cselect_b64
+ if (NumInsts % 2 == 0)
+ NumInsts /= 2;
+
+ CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
+ return RI.isSGPRClass(RC);
+ }
+ default:
+ return false;
+ }
+}
+
+void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ unsigned DstReg, ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg) const {
+ BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
+ if (Pred == VCCZ || Pred == SCC_FALSE) {
+ Pred = static_cast<BranchPredicate>(-Pred);
+ std::swap(TrueReg, FalseReg);
+ }
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
+ unsigned DstSize = DstRC->getSize();
+
+ if (DstSize == 4) {
+ unsigned SelOp = Pred == SCC_TRUE ?
+ AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
+
+ // Instruction's operands are backwards from what is expected.
+ MachineInstr *Select =
+ BuildMI(MBB, I, DL, get(SelOp), DstReg)
+ .addReg(FalseReg)
+ .addReg(TrueReg);
+
+ preserveCondRegFlags(Select->getOperand(3), Cond[1]);
+ return;
+ }
+
+ if (DstSize == 8 && Pred == SCC_TRUE) {
+ MachineInstr *Select =
+ BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
+ .addReg(FalseReg)
+ .addReg(TrueReg);
+
+ preserveCondRegFlags(Select->getOperand(3), Cond[1]);
+ return;
+ }
+
+ static const int16_t Sub0_15[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+ };
+
+ static const int16_t Sub0_15_64[] = {
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+ AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
+ AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
+ AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
+ };
+
+ unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
+ const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass;
+ const int16_t *SubIndices = Sub0_15;
+ int NElts = DstSize / 4;
+
+ // 64-bit select is only avaialble for SALU.
+ if (Pred == SCC_TRUE) {
+ SelOp = AMDGPU::S_CSELECT_B64;
+ EltRC = &AMDGPU::SGPR_64RegClass;
+ SubIndices = Sub0_15_64;
+
+ assert(NElts % 2 == 0);
+ NElts /= 2;
+ }
+
+ MachineInstrBuilder MIB = BuildMI(
+ MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
+
+ I = MIB->getIterator();
+
+ SmallVector<unsigned, 8> Regs;
+ for (int Idx = 0; Idx != NElts; ++Idx) {
+ unsigned DstElt = MRI.createVirtualRegister(EltRC);
+ Regs.push_back(DstElt);
+
+ unsigned SubIdx = SubIndices[Idx];
+
+ MachineInstr *Select =
+ BuildMI(MBB, I, DL, get(SelOp), DstElt)
+ .addReg(FalseReg, 0, SubIdx)
+ .addReg(TrueReg, 0, SubIdx);
+ preserveCondRegFlags(Select->getOperand(3), Cond[1]);
+
+ MIB.addReg(DstElt)
+ .addImm(SubIdx);
+ }
+}
+
+bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO: {
+ // If there are additional implicit register operands, this may be used for
+ // register indexing so the source register operand isn't simply copied.
+ unsigned NumOps = MI.getDesc().getNumOperands() +
+ MI.getDesc().getNumImplicitUses();
+
+ return MI.getNumOperands() == NumOps;
+ }
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::COPY:
+ return true;
+ default:
+ return false;
+ }
+}
+
static void removeModOperands(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
@@ -1400,15 +1588,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
- bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
-
- // Don't fold if we are using source modifiers. The new VOP2 instructions
- // don't have them.
- if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) ||
- hasModifiersSet(UseMI, AMDGPU::OpName::src1_modifiers) ||
- hasModifiersSet(UseMI, AMDGPU::OpName::src2_modifiers)) {
+ // Don't fold if we are using source or output modifiers. The new VOP2
+ // instructions don't have them.
+ if (hasAnyModifiersSet(UseMI))
return false;
- }
const MachineOperand &ImmOp = DefMI.getOperand(1);
@@ -1421,6 +1604,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (isInlineConstant(UseMI, *Src0, ImmOp))
return false;
+ bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
@@ -1633,20 +1817,26 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src0Mods =
+ getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+ const MachineOperand *Src1Mods =
+ getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+ const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
+ const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
return BuildMI(*MBB, MI, MI.getDebugLoc(),
get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
- .addOperand(*Dst)
- .addImm(0) // Src0 mods
- .addOperand(*Src0)
- .addImm(0) // Src1 mods
- .addOperand(*Src1)
+ .add(*Dst)
+ .addImm(Src0Mods ? Src0Mods->getImm() : 0)
+ .add(*Src0)
+ .addImm(Src1Mods ? Src1Mods->getImm() : 0)
+ .add(*Src1)
.addImm(0) // Src mods
- .addOperand(*Src2)
- .addImm(0) // clamp
- .addImm(0); // omod
+ .add(*Src2)
+ .addImm(Clamp ? Clamp->getImm() : 0)
+ .addImm(Omod ? Omod->getImm() : 0);
}
// It's not generally safe to move VALU instructions across these since it will
@@ -1687,7 +1877,8 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
case 16:
- return AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
+ return ST.has16BitInsts() &&
+ AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
default:
llvm_unreachable("invalid bitwidth");
@@ -1705,24 +1896,43 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
// would be for any 32-bit integer operand, but would not be for a 64-bit one.
int64_t Imm = MO.getImm();
- switch (operandBitWidth(OperandType)) {
- case 32: {
+ switch (OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
int32_t Trunc = static_cast<int32_t>(Imm);
return Trunc == Imm &&
AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
}
- case 64: {
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
return AMDGPU::isInlinableLiteral64(MO.getImm(),
ST.hasInv2PiInlineImm());
}
- case 16: {
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
if (isInt<16>(Imm) || isUInt<16>(Imm)) {
+ // A few special case instructions have 16-bit operands on subtargets
+ // where 16-bit instructions are not legal.
+ // TODO: Do the 32-bit immediates work? We shouldn't really need to handle
+ // constants in these cases
int16_t Trunc = static_cast<int16_t>(Imm);
- return AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
+ return ST.has16BitInsts() &&
+ AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
}
return false;
}
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+ uint32_t Trunc = static_cast<uint32_t>(Imm);
+ return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
+ }
default:
llvm_unreachable("invalid bitwidth");
}
@@ -1801,6 +2011,14 @@ bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
return Mods && Mods->getImm();
}
+bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
+ return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
+ hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
+ hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) ||
+ hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
+ hasModifiersSet(MI, AMDGPU::OpName::omod);
+}
+
bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
const MachineOperand &MO,
const MCOperandInfo &OpInfo) const {
@@ -2238,7 +2456,7 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
unsigned Reg = MRI.createVirtualRegister(VRC);
DebugLoc DL = MBB->findDebugLoc(I);
- BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).addOperand(MO);
+ BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
MO.ChangeToRegister(Reg, false);
}
@@ -2564,8 +2782,8 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
return;
unsigned DstReg = MRI.createVirtualRegister(DstRC);
- MachineInstr *Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg)
- .addOperand(Op);
+ MachineInstr *Copy =
+ BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
Op.setReg(DstReg);
Op.setSubReg(0);
@@ -2810,13 +3028,13 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
// Regular buffer load / store.
MachineInstrBuilder MIB =
BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
- .addOperand(*VData)
+ .add(*VData)
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
// This will be replaced later
// with the new value of vaddr.
- .addOperand(*SRsrc)
- .addOperand(*SOffset)
- .addOperand(*Offset);
+ .add(*SRsrc)
+ .add(*SOffset)
+ .add(*Offset);
// Atomics do not have this operand.
if (const MachineOperand *GLC =
@@ -2836,14 +3054,14 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
} else {
// Atomics with return.
Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
- .addOperand(*VData)
- .addOperand(*VDataIn)
+ .add(*VData)
+ .add(*VDataIn)
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
// This will be replaced later
// with the new value of vaddr.
- .addOperand(*SRsrc)
- .addOperand(*SOffset)
- .addOperand(*Offset)
+ .add(*SRsrc)
+ .add(*SOffset)
+ .add(*Offset)
.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
}
@@ -2970,6 +3188,14 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
case AMDGPU::S_BFE_U64:
case AMDGPU::S_BFM_B64:
llvm_unreachable("Moving this op to VALU not implemented");
+
+ case AMDGPU::S_PACK_LL_B32_B16:
+ case AMDGPU::S_PACK_LH_B32_B16:
+ case AMDGPU::S_PACK_HH_B32_B16: {
+ movePackToVALU(Worklist, MRI, Inst);
+ Inst.eraseFromParent();
+ continue;
+ }
}
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
@@ -3027,12 +3253,15 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
unsigned NewDstReg = AMDGPU::NoRegister;
if (HasDst) {
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ continue;
+
// Update the destination register class.
const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
if (!NewDstRC)
continue;
- unsigned DstReg = Inst.getOperand(0).getReg();
if (Inst.isCopy() &&
TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
@@ -3112,15 +3341,13 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
- BuildMI(MBB, MII, DL, InstDesc, DestSub0)
- .addOperand(SrcReg0Sub0);
+ BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub1, Src0SubRC);
unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
- BuildMI(MBB, MII, DL, InstDesc, DestSub1)
- .addOperand(SrcReg0Sub1);
+ BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
@@ -3174,8 +3401,8 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
- .addOperand(SrcReg0Sub0)
- .addOperand(SrcReg1Sub0);
+ .add(SrcReg0Sub0)
+ .add(SrcReg1Sub0);
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub1, Src0SubRC);
@@ -3184,8 +3411,8 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
- .addOperand(SrcReg0Sub1)
- .addOperand(SrcReg1Sub1);
+ .add(SrcReg0Sub1)
+ .add(SrcReg1Sub1);
unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
@@ -3231,13 +3458,9 @@ void SIInstrInfo::splitScalar64BitBCNT(
MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
AMDGPU::sub1, SrcSubRC);
- BuildMI(MBB, MII, DL, InstDesc, MidReg)
- .addOperand(SrcRegSub0)
- .addImm(0);
+ BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
- BuildMI(MBB, MII, DL, InstDesc, ResultReg)
- .addOperand(SrcRegSub1)
- .addReg(MidReg);
+ BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
@@ -3326,6 +3549,68 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
}
}
+void SIInstrInfo::movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineRegisterInfo &MRI,
+ MachineInstr &Inst) const {
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MachineBasicBlock *MBB = Inst.getParent();
+ MachineOperand &Src0 = Inst.getOperand(1);
+ MachineOperand &Src1 = Inst.getOperand(2);
+ const DebugLoc &DL = Inst.getDebugLoc();
+
+ switch (Inst.getOpcode()) {
+ case AMDGPU::S_PACK_LL_B32_B16: {
+ unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ // FIXME: Can do a lot better if we know the high bits of src0 or src1 are
+ // 0.
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
+ .addImm(0xffff);
+
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
+ .addReg(ImmReg, RegState::Kill)
+ .add(Src0);
+
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
+ .add(Src1)
+ .addImm(16)
+ .addReg(TmpReg, RegState::Kill);
+ break;
+ }
+ case AMDGPU::S_PACK_LH_B32_B16: {
+ unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
+ .addImm(0xffff);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
+ .addReg(ImmReg, RegState::Kill)
+ .add(Src0)
+ .add(Src1);
+ break;
+ }
+ case AMDGPU::S_PACK_HH_B32_B16: {
+ unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
+ .addImm(16)
+ .add(Src0);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
+ .addImm(0xffff);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
+ .add(Src1)
+ .addReg(ImmReg, RegState::Kill)
+ .addReg(TmpReg, RegState::Kill);
+ break;
+ }
+ default:
+ llvm_unreachable("unhandled s_pack_* instruction");
+ }
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
+}
+
void SIInstrInfo::addSCCDefUsersToVALUWorklist(
MachineInstr &SCCDefInst, SmallVectorImpl<MachineInstr *> &Worklist) const {
// This assumes that all the users of SCC are in the same block
@@ -3448,10 +3733,13 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
if (ST.isAmdHsaOS()) {
- RsrcDataFormat |= (1ULL << 56);
+ // Set ATC = 1. GFX9 doesn't have this bit.
+ if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS)
+ RsrcDataFormat |= (1ULL << 56);
- if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
- // Set MTYPE = 2
+ // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
+ // BTW, it disables TC L2 and therefore decreases performance.
+ if (ST.getGeneration() == SISubtarget::VOLCANIC_ISLANDS)
RsrcDataFormat |= (2ULL << 59);
}
@@ -3463,11 +3751,14 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size;
- uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
+ // GFX9 doesn't have ELEMENT_SIZE.
+ if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) {
+ uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
+ Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
+ }
- Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) |
- // IndexStride = 64
- (UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT);
+ // IndexStride = 64.
+ Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
// Clear them unless we want a huge stride.
@@ -3496,7 +3787,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
return AMDGPU::NoRegister;
assert(!MI.memoperands_empty() &&
- (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
+ (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
FrameIndex = Addr->getIndex();
return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
@@ -3552,16 +3843,11 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (DescSize != 0 && DescSize != 4)
return DescSize;
- if (Opc == AMDGPU::WAVE_BARRIER)
- return 0;
-
// 4-byte instructions may have a 32-bit literal encoded after them. Check
// operands that coud ever be literals.
if (isVALU(MI) || isSALU(MI)) {
- if (isFixedSize(MI)) {
- assert(DescSize == 4);
+ if (isFixedSize(MI))
return DescSize;
- }
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
if (Src0Idx == -1)
@@ -3584,7 +3870,6 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return 4;
switch (Opc) {
- case AMDGPU::SI_MASK_BRANCH:
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
case TargetOpcode::DBG_VALUE:
@@ -3609,7 +3894,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
return true;
for (const MachineMemOperand *MMO : MI.memoperands()) {
- if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
+ if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
return true;
}
return false;
@@ -3640,3 +3925,21 @@ ScheduleHazardRecognizer *
SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
return new GCNHazardRecognizer(MF);
}
+
+bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
+ return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
+ MI.modifiesRegister(AMDGPU::EXEC, &RI);
+}
+
+MachineInstrBuilder
+SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ unsigned DestReg) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+ return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
+ .addReg(UnusedCarry, RegState::Define | RegState::Dead);
+}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index e68f6f92ba96..659473ca6a47 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -69,6 +69,9 @@ private:
MachineInstr &Inst) const;
void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr &Inst) const;
+ void movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineRegisterInfo &MRI,
+ MachineInstr &Inst) const;
void addUsersToMoveToVALUWorklist(
unsigned Reg, MachineRegisterInfo &MRI,
@@ -203,10 +206,24 @@ public:
bool reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const override;
+
+ bool canInsertSelect(const MachineBasicBlock &MBB,
+ ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles,
+ int &TrueCycles, int &FalseCycles) const override;
+
+ void insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ unsigned DstReg, ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg) const override;
+
bool
areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
AliasAnalysis *AA = nullptr) const override;
+ bool isFoldableCopy(const MachineInstr &MI) const;
+
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
MachineRegisterInfo *MRI) const final;
@@ -308,6 +325,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::VOP3;
}
+ static bool isSDWA(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
+ }
+
+ bool isSDWA(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SDWA;
+ }
+
static bool isVOPC(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
}
@@ -420,6 +445,22 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::DPP;
}
+ static bool isVOP3P(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
+ }
+
+ bool isVOP3P(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
+ }
+
+ static bool isVINTRP(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
+ }
+
+ bool isVINTRP(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
+ }
+
static bool isScalarUnit(const MachineInstr &MI) {
return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
}
@@ -454,6 +495,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
}
+ static bool hasFPClamp(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::HasFPClamp;
+ }
+
+ bool hasFPClamp(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::HasFPClamp;
+ }
+
bool isVGPRCopy(const MachineInstr &MI) const {
assert(MI.isCopy());
unsigned Dest = MI.getOperand(0).getReg();
@@ -462,28 +511,6 @@ public:
return !RI.isSGPRReg(MRI, Dest);
}
- static int operandBitWidth(uint8_t OperandType) {
- switch (OperandType) {
- case AMDGPU::OPERAND_REG_IMM_INT32:
- case AMDGPU::OPERAND_REG_IMM_FP32:
- case AMDGPU::OPERAND_REG_INLINE_C_INT32:
- case AMDGPU::OPERAND_REG_INLINE_C_FP32:
- return 32;
- case AMDGPU::OPERAND_REG_IMM_INT64:
- case AMDGPU::OPERAND_REG_IMM_FP64:
- case AMDGPU::OPERAND_REG_INLINE_C_INT64:
- case AMDGPU::OPERAND_REG_INLINE_C_FP64:
- return 64;
- case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
- case AMDGPU::OPERAND_REG_IMM_INT16:
- case AMDGPU::OPERAND_REG_IMM_FP16:
- return 16;
- default:
- llvm_unreachable("unexpected operand type");
- }
- }
-
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
@@ -571,6 +598,7 @@ public:
bool hasModifiersSet(const MachineInstr &MI,
unsigned OpName) const;
+ bool hasAnyModifiersSet(const MachineInstr &MI) const;
bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;
@@ -731,6 +759,17 @@ public:
ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
+
+ bool isBasicBlockPrologue(const MachineInstr &MI) const override;
+
+ /// \brief Return a partially built integer add instruction without carry.
+ /// Caller must add source operands.
+ /// For pre-GFX9 it will generate unused carry destination operand.
+ /// TODO: After GFX9 it should return a no-carry operation.
+ MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ unsigned DestReg) const;
};
namespace AMDGPU {
@@ -741,6 +780,9 @@ namespace AMDGPU {
int getVOPe32(uint16_t Opcode);
LLVM_READONLY
+ int getSDWAOp(uint16_t Opcode);
+
+ LLVM_READONLY
int getCommuteRev(uint16_t Opcode);
LLVM_READONLY
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index ebaefae3bfef..c6daf743f3ac 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -71,11 +71,6 @@ def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
-def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
- SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>,
- SDTCisVT<3, i32>]>
->;
-
class SDSample<string opcode> : SDNode <opcode,
SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>,
SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
@@ -107,7 +102,7 @@ def SIld_local : SDNode <"ISD::LOAD", SDTLoad,
>;
def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{
- return cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ return cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
@@ -144,7 +139,7 @@ def SIst_local : SDNode <"ISD::STORE", SDTStore,
def si_st_local : PatFrag <
(ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ return cast<StoreSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
def si_store_local : PatFrag <
@@ -196,6 +191,21 @@ def si_uniform_br_scc : PatFrag <
return isCBranchSCC(N);
}]>;
+def lshr_rev : PatFrag <
+ (ops node:$src1, node:$src0),
+ (srl $src0, $src1)
+>;
+
+def ashr_rev : PatFrag <
+ (ops node:$src1, node:$src0),
+ (sra $src0, $src1)
+>;
+
+def lshl_rev : PatFrag <
+ (ops node:$src1, node:$src0),
+ (shl $src0, $src1)
+>;
+
multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0> {
def _glue : SDNode <
@@ -266,10 +276,6 @@ def SIMM16bit : PatLeaf <(imm),
[{return isInt<16>(N->getSExtValue());}]
>;
-def IMM20bit : PatLeaf <(imm),
- [{return isUInt<20>(N->getZExtValue());}]
->;
-
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
return isInlineImmediate(N);
}]>;
@@ -299,6 +305,19 @@ class VGPRImm <dag frag> : PatLeaf<frag, [{
return Limit < 10;
}]>;
+def NegateImm : SDNodeXForm<imm, [{
+ return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+// TODO: When FP inline imm values work?
+def NegSubInlineConst32 : ImmLeaf<i32, [{
+ return Imm < -16 && Imm >= -64;
+}], NegateImm>;
+
+def NegSubInlineConst16 : ImmLeaf<i16, [{
+ return Imm < -16 && Imm >= -64;
+}], NegateImm>;
+
//===----------------------------------------------------------------------===//
// Custom Operands
//===----------------------------------------------------------------------===//
@@ -449,6 +468,12 @@ class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
let ParserMatchClass = MatchClass;
}
+class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
+ OperandWithDefaultOps<i32, (ops (i32 0))> {
+ let PrintMethod = "print"#Name;
+ let ParserMatchClass = MatchClass;
+}
+
let OperandType = "OPERAND_IMMEDIATE" in {
def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
@@ -486,6 +511,11 @@ def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
+def op_sel : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>;
+def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
+def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
+def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
+
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
@@ -525,6 +555,7 @@ class FPInputModsMatchClass <int opSize> : AsmOperandClass {
let ParserMethod = "parseRegOrImmWithFPInputMods";
let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
}
+
def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
@@ -577,6 +608,33 @@ def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
let PrintMethod = "printOperandAndIntInputMods";
}
+class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
+ let Name = "PackedFP"#opSize#"InputMods";
+ let ParserMethod = "parseRegOrImm";
+ let PredicateMethod = "isRegOrImm";
+// let PredicateMethod = "isPackedFP"#opSize#"InputMods";
+}
+
+class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
+ let Name = "PackedInt"#opSize#"InputMods";
+ let ParserMethod = "parseRegOrImm";
+ let PredicateMethod = "isRegOrImm";
+// let PredicateMethod = "isPackedInt"#opSize#"InputMods";
+}
+
+def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
+def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
+
+class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
+// let PrintMethod = "printPackedFPInputMods";
+}
+
+class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
+ //let PrintMethod = "printPackedIntInputMods";
+}
+
+def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
+def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
//===----------------------------------------------------------------------===//
// Complex patterns
@@ -593,6 +651,14 @@ def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
+// VOP3Mods, but the input source is known to never be NaN.
+def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
+
+def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
+
+def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
+def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;
+
//===----------------------------------------------------------------------===//
// SI assembler operands
@@ -604,19 +670,32 @@ def SIOperand {
int FLAT_SCR = 0x68;
}
+// This should be kept in sync with SISrcMods enum
def SRCMODS {
int NONE = 0;
int NEG = 1;
+ int ABS = 2;
+ int NEG_ABS = 3;
+
+ int NEG_HI = ABS;
+ int OP_SEL_0 = 4;
+ int OP_SEL_1 = 8;
}
def DSTCLAMP {
int NONE = 0;
+ int ENABLE = 1;
}
def DSTOMOD {
int NONE = 0;
}
+def TRAPID{
+ int LLVM_TRAP = 2;
+ int LLVM_DEBUG_TRAP = 3;
+}
+
//===----------------------------------------------------------------------===//
//
// SI Instruction multiclass helpers.
@@ -648,8 +727,9 @@ class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
exp_vm:$vm, exp_compr:$compr, i8imm:$en),
"exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm",
- [(node (i8 timm:$en), (i1 timm:$vm), (i8 timm:$tgt), (i1 timm:$compr),
- f32:$src0, f32:$src1, f32:$src2, f32:$src3)]> {
+ [(node (i8 timm:$tgt), (i8 timm:$en),
+ f32:$src0, f32:$src1, f32:$src2, f32:$src3,
+ (i1 timm:$compr), (i1 timm:$vm))]> {
let AsmMatchConverter = "cvtExp";
}
@@ -666,6 +746,7 @@ multiclass EXP_m<bit done, SDPatternOperator node> {
def _si : EXP_Helper<done>,
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>,
EXPe {
+ let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
}
@@ -673,6 +754,7 @@ multiclass EXP_m<bit done, SDPatternOperator node> {
def _vi : EXP_Helper<done>,
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>,
EXPe_vi {
+ let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
}
@@ -706,12 +788,34 @@ class getVALUDstForVT<ValueType VT> {
// instructions for the given VT.
class getVOPSrc0ForVT<ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+ !if(!eq(VT.Value, v2f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
- 0)));
- RegisterOperand ret = !if(isFP,
- !if(!eq(VT.Size, 64), VSrc_f64, !if(!eq(VT.Size, 16), VSrc_f16, VSrc_f32)),
- !if(!eq(VT.Size, 64), VSrc_b64, !if(!eq(VT.Size, 16), VSrc_b16, VSrc_b32)));
+ 0))));
+
+ RegisterOperand ret =
+ !if(isFP,
+ !if(!eq(VT.Size, 64),
+ VSrc_f64,
+ !if(!eq(VT.Value, f16.Value),
+ VSrc_f16,
+ !if(!eq(VT.Value, v2f16.Value),
+ VCSrc_v2f16,
+ VSrc_f32
+ )
+ )
+ ),
+ !if(!eq(VT.Size, 64),
+ VSrc_b64,
+ !if(!eq(VT.Value, i16.Value),
+ VSrc_b16,
+ !if(!eq(VT.Value, v2i16.Value),
+ VCSrc_v2b16,
+ VSrc_b32
+ )
+ )
+ )
+ );
}
// Returns the vreg register class to use for source operand given VT
@@ -725,25 +829,38 @@ class getVregSrcForVT<ValueType VT> {
// given VT.
class getVOP3SrcForVT<ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+ !if(!eq(VT.Value, v2f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
- 0)));
+ 0))));
RegisterOperand ret =
!if(!eq(VT.Size, 128),
- VSrc_128,
- !if(!eq(VT.Size, 64),
+ VSrc_128,
+ !if(!eq(VT.Size, 64),
!if(isFP,
- VCSrc_f64,
- VCSrc_b64),
+ VCSrc_f64,
+ VCSrc_b64),
!if(!eq(VT.Value, i1.Value),
- SCSrc_b64,
- !if(isFP,
- !if(!eq(VT.Size, 16), VCSrc_f16, VCSrc_f32),
- !if(!eq(VT.Size, 16), VCSrc_b16, VCSrc_b32)
- )
- )
- )
- );
+ SCSrc_b64,
+ !if(isFP,
+ !if(!eq(VT.Value, f16.Value),
+ VCSrc_f16,
+ !if(!eq(VT.Value, v2f16.Value),
+ VCSrc_v2f16,
+ VCSrc_f32
+ )
+ ),
+ !if(!eq(VT.Value, i16.Value),
+ VCSrc_b16,
+ !if(!eq(VT.Value, v2i16.Value),
+ VCSrc_v2b16,
+ VCSrc_b32
+ )
+ )
+ )
+ )
+ )
+ );
}
// Returns 1 if the source arguments have modifiers, 0 if they do not.
@@ -753,7 +870,8 @@ class isFloatType<ValueType SrcVT> {
!if(!eq(SrcVT.Value, f16.Value), 1,
!if(!eq(SrcVT.Value, f32.Value), 1,
!if(!eq(SrcVT.Value, f64.Value), 1,
- 0)));
+ !if(!eq(SrcVT.Value, v2f16.Value), 1,
+ 0))));
}
class isIntType<ValueType SrcVT> {
@@ -764,6 +882,23 @@ class isIntType<ValueType SrcVT> {
0)));
}
+class isPackedType<ValueType SrcVT> {
+ bit ret =
+ !if(!eq(SrcVT.Value, v2i16.Value), 1,
+ !if(!eq(SrcVT.Value, v2f16.Value), 1, 0)
+ );
+}
+
+// Float or packed int
+class isModifierType<ValueType SrcVT> {
+ bit ret =
+ !if(!eq(SrcVT.Value, f16.Value), 1,
+ !if(!eq(SrcVT.Value, f32.Value), 1,
+ !if(!eq(SrcVT.Value, f64.Value), 1,
+ !if(!eq(SrcVT.Value, v2f16.Value), 1,
+ !if(!eq(SrcVT.Value, v2i16.Value), 1,
+ 0)))));
+}
// Return type of input modifiers operand for specified input operand
class getSrcMod <ValueType VT> {
@@ -771,6 +906,7 @@ class getSrcMod <ValueType VT> {
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
0)));
+ bit isPacked = isPackedType<VT>.ret;
Operand ret = !if(!eq(VT.Size, 64),
!if(isFP, FP64InputMods, Int64InputMods),
!if(isFP,
@@ -801,8 +937,8 @@ class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
// Returns the input arguments for VOP3 instructions for the given SrcVT.
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
- bit HasModifiers, Operand Src0Mod, Operand Src1Mod,
- Operand Src2Mod> {
+ bit HasModifiers, bit HasOMod,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret =
!if (!eq(NumSrcArgs, 0),
@@ -821,9 +957,13 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
!if (!eq(NumSrcArgs, 2),
!if (!eq(HasModifiers, 1),
// VOP 2 with modifiers
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod:$clamp, omod:$omod)
+ !if( !eq(HasOMod, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp, omod:$omod),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp))
/* else */,
// VOP2 without modifiers
(ins Src0RC:$src0, Src1RC:$src1)
@@ -831,16 +971,57 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
/* NumSrcArgs == 3 */,
!if (!eq(HasModifiers, 1),
// VOP3 with modifiers
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp, omod:$omod)
+ !if (!eq(HasOMod, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp, omod:$omod),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp))
/* else */,
// VOP3 without modifiers
(ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
/* endif */ ))));
}
+/// XXX - src1 may only allow VGPRs?
+
+// The modifiers (except clamp) are dummy operands for the benefit of
+// printing and parsing. They defer their values to looking at the
+// srcN_modifiers for what to print.
+class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
+ RegisterOperand Src2RC, int NumSrcArgs,
+ bit HasClamp,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ dag ret = !if (!eq(NumSrcArgs, 2),
+ !if (HasClamp,
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp,
+ op_sel:$op_sel, op_sel_hi:$op_sel_hi,
+ neg_lo:$neg_lo, neg_hi:$neg_hi),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ op_sel:$op_sel, op_sel_hi:$op_sel_hi,
+ neg_lo:$neg_lo, neg_hi:$neg_hi)),
+ // else NumSrcArgs == 3
+ !if (HasClamp,
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp,
+ op_sel:$op_sel, op_sel_hi:$op_sel_hi,
+ neg_lo:$neg_lo, neg_hi:$neg_hi),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ op_sel:$op_sel, op_sel_hi:$op_sel_hi,
+ neg_lo:$neg_lo, neg_hi:$neg_hi))
+ );
+}
+
class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
bit HasModifiers, Operand Src0Mod, Operand Src1Mod> {
@@ -924,7 +1105,8 @@ class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
// Returns the assembly string for the inputs and outputs of a VOP3
// instruction.
-class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
+class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers,
+ bit HasOMod, ValueType DstVT = i32> {
string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
@@ -934,7 +1116,26 @@ class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT =
string ret =
!if(!eq(HasModifiers, 0),
getAsm32<HasDst, NumSrcArgs, DstVT>.ret,
- dst#", "#src0#src1#src2#"$clamp"#"$omod");
+ dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", ""));
+}
+
+// Returns the assembly string for the inputs and outputs of a VOP3P
+// instruction.
+class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
+ bit HasClamp, ValueType DstVT = i32> {
+ string dst = " $vdst";
+ string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
+ string src1 = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1",
+ " $src1,"));
+ string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
+
+ string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
+ string clamp = !if(HasClamp, "$clamp", "");
+
+ // Each modifier is printed as an array of bits for each operand, so
+ // all operands are printed as part of src0_modifiers.
+ string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
}
class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
@@ -1035,7 +1236,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
field Operand Src0ModSDWA = getSrcModExt<Src0VT>.ret;
field Operand Src1ModSDWA = getSrcModExt<Src1VT>.ret;
-
+
field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
field bit HasDst32 = HasDst;
@@ -1046,7 +1247,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
// TODO: Modifiers logic is somewhat adhoc here, to be refined later
- field bit HasModifiers = isFloatType<Src0VT>.ret;
+ field bit HasModifiers = isModifierType<Src0VT>.ret;
field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
@@ -1060,12 +1261,20 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
- field bit HasOMod = HasModifiers;
field bit HasClamp = HasModifiers;
field bit HasSDWAClamp = HasSrc0;
+ field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
+
+ field bit IsPacked = isPackedType<Src0VT>.ret;
+ field bit HasOpSel = IsPacked;
+ field bit HasOMod = !if(HasOpSel, 0, HasModifiers);
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
+ field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
+ field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
+ field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
+
field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
// VOP3b instructions are a special case with a second explicit
@@ -1077,7 +1286,12 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
- HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
+ HasModifiers, HasOMod, Src0Mod, Src1Mod,
+ Src2Mod>.ret;
+ field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
+ NumSrcArgs, HasClamp,
+ Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
+
field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
@@ -1085,7 +1299,8 @@ class VOPProfile <list<ValueType> _ArgVT> {
DstVT>.ret;
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
- field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+ field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret;
+ field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
}
@@ -1101,11 +1316,18 @@ def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
-def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
+def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
-def VOP_I16_I16_I16_I16 : VOPProfile <[i32, i32, i32, i32, untyped]>;
+def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
+def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
+def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
+def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
+
+def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
+def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
+
def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
@@ -1117,6 +1339,8 @@ def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
+def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
+def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
@@ -1126,6 +1350,7 @@ def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
+def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
@@ -1213,6 +1438,15 @@ def getVOPe32 : InstrMapping {
let ValueCols = [["4", "0"]];
}
+// Maps ordinary instructions to their SDWA counterparts
+def getSDWAOp : InstrMapping {
+ let FilterClass = "VOP";
+ let RowFields = ["OpName"];
+ let ColFields = ["AsmVariantName"];
+ let KeyCol = ["Default"];
+ let ValueCols = [["SDWA"]];
+}
+
def getMaskedMIMGOp : InstrMapping {
let FilterClass = "MIMG_Mask";
let RowFields = ["Op"];
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 38e31e75ee67..2f89503e129a 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -111,6 +111,12 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
(ins VSrc_b64:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
+def S_TRAP_PSEUDO : SPseudoInstSI <(outs), (ins i16imm:$simm16)> {
+ let hasSideEffects = 1;
+ let SALU = 1;
+ let usesCustomInserter = 1;
+}
+
let usesCustomInserter = 1, SALU = 1 in {
def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
[(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
@@ -146,6 +152,8 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
let mayStore = 1;
let isBarrier = 1;
let isConvergent = 1;
+ let FixedSize = 1;
+ let Size = 0;
}
// SI pseudo instructions. These are used by the CFG structurizer pass
@@ -153,48 +161,44 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
// Dummy terminator instruction to use after control flow instructions
// replaced with exec mask operations.
-def SI_MASK_BRANCH : PseudoInstSI <
+def SI_MASK_BRANCH : VPseudoInstSI <
(outs), (ins brtarget:$target)> {
let isBranch = 0;
let isTerminator = 1;
let isBarrier = 0;
- let Uses = [EXEC];
let SchedRW = [];
let hasNoSchedulingInfo = 1;
+ let FixedSize = 1;
+ let Size = 0;
}
let isTerminator = 1 in {
def SI_IF: CFPseudoInstSI <
(outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
- [(set i64:$dst, (int_amdgcn_if i1:$vcc, bb:$target))], 1, 1> {
+ [(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
let Constraints = "";
let Size = 12;
- let mayLoad = 1;
- let mayStore = 1;
let hasSideEffects = 1;
}
def SI_ELSE : CFPseudoInstSI <
- (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
let Constraints = "$src = $dst";
let Size = 12;
- let mayStore = 1;
- let mayLoad = 1;
let hasSideEffects = 1;
}
def SI_LOOP : CFPseudoInstSI <
(outs), (ins SReg_64:$saved, brtarget:$target),
- [(int_amdgcn_loop i64:$saved, bb:$target)], 1, 1> {
+ [(AMDGPUloop i64:$saved, bb:$target)], 1, 1> {
let Size = 8;
- let isBranch = 1;
+ let isBranch = 0;
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
}
-} // End isBranch = 1, isTerminator = 1
+} // End isTerminator = 1
def SI_END_CF : CFPseudoInstSI <
(outs), (ins SReg_64:$saved),
@@ -202,9 +206,9 @@ def SI_END_CF : CFPseudoInstSI <
let Size = 4;
let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
- let mayLoad = 1;
- let mayStore = 1;
let hasSideEffects = 1;
+ let mayLoad = 1; // FIXME: Should not need memory flags
+ let mayStore = 1;
}
def SI_BREAK : CFPseudoInstSI <
@@ -244,6 +248,10 @@ def SI_KILL_TERMINATOR : SPseudoInstSI <
let isTerminator = 1;
}
+def SI_ILLEGAL_COPY : SPseudoInstSI <
+ (outs unknown:$dst), (ins unknown:$src),
+ [], " ; illegal copy $src to $dst">;
+
} // End Uses = [EXEC], Defs = [EXEC,VCC]
// Branch on undef scc. Used to avoid intermediate copy from
@@ -259,6 +267,14 @@ def SI_PS_LIVE : PseudoInstSI <
let SALU = 1;
}
+def SI_MASKED_UNREACHABLE : SPseudoInstSI <(outs), (ins),
+ [(int_amdgcn_unreachable)],
+ "; divergent unreachable"> {
+ let Size = 0;
+ let hasNoSchedulingInfo = 1;
+ let FixedSize = 1;
+}
+
// Used as an isel pseudo to directly emit initialization with an
// s_mov_b32 rather than a copy of another initialized
// register. MachineCSE skips copies, and we don't want to have to
@@ -270,12 +286,12 @@ def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> {
let isReMaterializable = 1;
}
-def SI_RETURN : SPseudoInstSI <
- (outs), (ins variable_ops), [(AMDGPUreturn)]> {
+// Return for returning shaders to a shader variant epilog.
+def SI_RETURN_TO_EPILOG : SPseudoInstSI <
+ (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {
let isTerminator = 1;
let isBarrier = 1;
let isReturn = 1;
- let hasSideEffects = 1;
let hasNoSchedulingInfo = 1;
let DisableWQM = 1;
}
@@ -383,9 +399,18 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
} // End SubtargetPredicate = isGCN
let Predicates = [isGCN] in {
+def : Pat<
+ (trap),
+ (S_TRAP_PSEUDO TRAPID.LLVM_TRAP)
+>;
def : Pat<
- (int_amdgcn_else i64:$src, bb:$target),
+ (debugtrap),
+ (S_TRAP_PSEUDO TRAPID.LLVM_DEBUG_TRAP)
+>;
+
+def : Pat<
+ (AMDGPUelse i64:$src, bb:$target),
(SI_ELSE $src, $target, 0)
>;
@@ -423,24 +448,37 @@ def : Pat <
} // End Predicates = [UnsafeFPMath]
+
+// f16_to_fp patterns
def : Pat <
- (f32 (fpextend f16:$src)),
- (V_CVT_F32_F16_e32 $src)
+ (f32 (f16_to_fp i32:$src0)),
+ (V_CVT_F32_F16_e64 SRCMODS.NONE, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
def : Pat <
- (f64 (fpextend f16:$src)),
- (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src))
+ (f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))),
+ (V_CVT_F32_F16_e64 SRCMODS.ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : Pat <
+ (f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))),
+ (V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : Pat <
+ (f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))),
+ (V_CVT_F32_F16_e64 SRCMODS.NEG, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
def : Pat <
- (f16 (fpround f32:$src)),
- (V_CVT_F16_F32_e32 $src)
+ (f64 (fpextend f16:$src)),
+ (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src))
>;
+// fp_to_fp16 patterns
def : Pat <
- (f16 (fpround f64:$src)),
- (V_CVT_F16_F32_e32 (V_CVT_F32_F64_e32 $src))
+ (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)))),
+ (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, $clamp, $omod)
>;
def : Pat <
@@ -480,6 +518,16 @@ multiclass FMADPat <ValueType vt, Instruction inst> {
defm : FMADPat <f16, V_MAC_F16_e64>;
defm : FMADPat <f32, V_MAC_F32_e64>;
+class FMADModsPat<Instruction inst, SDPatternOperator mad_opr> : Pat<
+ (f32 (mad_opr (VOP3Mods f32:$src0, i32:$src0_mod),
+ (VOP3Mods f32:$src1, i32:$src1_mod),
+ (VOP3Mods f32:$src2, i32:$src2_mod))),
+ (inst $src0_mod, $src0, $src1_mod, $src1,
+ $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : FMADModsPat<V_MAD_F32, AMDGPUfmad_ftz>;
+
multiclass SelectPat <ValueType vt, Instruction inst> {
def : Pat <
(vt (select i1:$src0, vt:$src1, vt:$src2)),
@@ -578,6 +626,16 @@ def : BitConvert <i32, f32, VGPR_32>;
def : BitConvert <f32, i32, VGPR_32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <f32, i32, SReg_32>;
+def : BitConvert <v2i16, i32, SReg_32>;
+def : BitConvert <i32, v2i16, SReg_32>;
+def : BitConvert <v2f16, i32, SReg_32>;
+def : BitConvert <i32, v2f16, SReg_32>;
+def : BitConvert <v2i16, v2f16, SReg_32>;
+def : BitConvert <v2f16, v2i16, SReg_32>;
+def : BitConvert <v2f16, f32, SReg_32>;
+def : BitConvert <f32, v2f16, SReg_32>;
+def : BitConvert <v2i16, f32, SReg_32>;
+def : BitConvert <f32, v2i16, SReg_32>;
// 64-bit bitcast
def : BitConvert <i64, f64, VReg_64>;
@@ -619,12 +677,20 @@ def : BitConvert <v16f32, v16i32, VReg_512>;
/********** Src & Dst modifiers **********/
/********** =================== **********/
-def : Pat <
- (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod),
- (f32 FP_ZERO), (f32 FP_ONE)),
- (V_ADD_F32_e64 $src0_modifiers, $src0, 0, (i32 0), 1, $omod)
+
+// If denormals are not enabled, it only impacts the compare of the
+// inputs. The output result is not flushed.
+class ClampPat<Instruction inst, ValueType vt> : Pat <
+ (vt (AMDGPUclamp
+ (VOP3Mods0Clamp vt:$src0, i32:$src0_modifiers, i32:$omod))),
+ (inst i32:$src0_modifiers, vt:$src0,
+ i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, $omod)
>;
+def : ClampPat<V_MAX_F32_e64, f32>;
+def : ClampPat<V_MAX_F64, f64>;
+def : ClampPat<V_MAX_F16_e64, f16>;
+
/********** ================================ **********/
/********** Floating point absolute/negative **********/
/********** ================================ **********/
@@ -678,6 +744,37 @@ def : Pat <
>;
def : Pat <
+ (fcopysign f16:$src0, f16:$src1),
+ (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
+>;
+
+def : Pat <
+ (fcopysign f32:$src0, f16:$src1),
+ (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), $src0,
+ (V_LSHLREV_B32_e64 (i32 16), $src1))
+>;
+
+def : Pat <
+ (fcopysign f64:$src0, f16:$src1),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
+ (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (V_LSHLREV_B32_e64 (i32 16), $src1)), sub1)
+>;
+
+def : Pat <
+ (fcopysign f16:$src0, f32:$src1),
+ (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0,
+ (V_LSHRREV_B32_e64 (i32 16), $src1))
+>;
+
+def : Pat <
+ (fcopysign f16:$src0, f64:$src1),
+ (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0,
+ (V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1)))
+>;
+
+def : Pat <
(fneg f16:$src),
(V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000)))
>;
@@ -692,6 +789,25 @@ def : Pat <
(S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
>;
+def : Pat <
+ (fneg v2f16:$src),
+ (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80008000)), $src)
+>;
+
+def : Pat <
+ (fabs v2f16:$src),
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fff7fff)), $src)
+>;
+
+// This is really (fneg (fabs v2f16:$src))
+//
+// fabs is not reported as free because there is modifier for it in
+// VOP3P instructions, so it is turned into the bit op.
+def : Pat <
+ (fneg (v2f16 (bitconvert (and_oneuse i32:$src, 0x7fff7fff)))),
+ (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit
+>;
+
/********** ================== **********/
/********** Immediate Patterns **********/
/********** ================== **********/
@@ -759,27 +875,6 @@ def : Pat <
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
def : Pat <
- (int_AMDGPU_cube v4f32:$src),
- (REG_SEQUENCE VReg_128,
- (V_CUBETC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
- 0 /* src1_modifiers */, (f32 (EXTRACT_SUBREG $src, sub1)),
- 0 /* src2_modifiers */, (f32 (EXTRACT_SUBREG $src, sub2)),
- 0 /* clamp */, 0 /* omod */), sub0,
- (V_CUBESC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
- 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
- 0 /* src2_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
- 0 /* clamp */, 0 /* omod */), sub1,
- (V_CUBEMA_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
- 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
- 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
- 0 /* clamp */, 0 /* omod */), sub2,
- (V_CUBEID_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
- 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
- 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
- 0 /* clamp */, 0 /* omod */), sub3)
->;
-
-def : Pat <
(i32 (sext i1:$src0)),
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
>;
@@ -985,6 +1080,11 @@ def : Pat <
//===----------------------------------------------------------------------===//
// Miscellaneous Patterns
//===----------------------------------------------------------------------===//
+def : Pat <
+ (i32 (AMDGPUfp16_zext f16:$src)),
+ (COPY $src)
+>;
+
def : Pat <
(i32 (trunc i64:$a)),
@@ -1028,24 +1128,72 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
+defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
-def : BFEPattern <V_BFE_U32, S_MOV_B32>;
+def : Pat<
+ (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
+ (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src, 0, 0)
+>;
def : Pat<
- (fcanonicalize f16:$src),
- (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), 0, $src, 0, 0)
+ (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
+ (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0)
>;
def : Pat<
- (fcanonicalize f32:$src),
- (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), 0, $src, 0, 0)
+ (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
+ (V_MUL_F64 0, CONST.FP64_ONE, $src_mods, $src, 0, 0)
>;
def : Pat<
- (fcanonicalize f64:$src),
- (V_MUL_F64 0, CONST.FP64_ONE, 0, $src, 0, 0)
+ (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
+ (V_PK_MUL_F16 SRCMODS.OP_SEL_1, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
+>;
+
+
+// Allow integer inputs
+class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : Pat<
+ (node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)),
+ (Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en)
+>;
+
+def : ExpPattern<AMDGPUexport, i32, EXP>;
+def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
+
+def : Pat <
+ (v2i16 (build_vector i16:$src0, i16:$src1)),
+ (v2i16 (S_PACK_LL_B32_B16 $src0, $src1))
+>;
+
+// With multiple uses of the shift, this will duplicate the shift and
+// increase register pressure.
+def : Pat <
+ (v2i16 (build_vector i16:$src0, (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
+ (v2i16 (S_PACK_LH_B32_B16 i16:$src0, i32:$src1))
>;
+def : Pat <
+ (v2i16 (build_vector (i16 (trunc (srl_oneuse i32:$src0, (i32 16)))),
+ (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
+ (v2i16 (S_PACK_HH_B32_B16 $src0, $src1))
+>;
+
+// TODO: Should source modifiers be matched to v_pack_b32_f16?
+def : Pat <
+ (v2f16 (build_vector f16:$src0, f16:$src1)),
+ (v2f16 (S_PACK_LL_B32_B16 $src0, $src1))
+>;
+
+// def : Pat <
+// (v2f16 (scalar_to_vector f16:$src0)),
+// (COPY $src0)
+// >;
+
+// def : Pat <
+// (v2i16 (scalar_to_vector i16:$src0)),
+// (COPY $src0)
+// >;
+
//===----------------------------------------------------------------------===//
// Fract Patterns
//===----------------------------------------------------------------------===//
@@ -1083,11 +1231,39 @@ def : Pat <
// Miscellaneous Optimization Patterns
//============================================================================//
+// Undo sub x, c -> add x, -c canonicalization since c is more likely
+// an inline immediate than -c.
+// TODO: Also do for 64-bit.
+def : Pat<
+ (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
+ (S_SUB_I32 $src0, NegSubInlineConst32:$src1)
+>;
+
def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
def : IntMed3Pat<V_MED3_I32, smax, smax_oneuse, smin_oneuse>;
def : IntMed3Pat<V_MED3_U32, umax, umax_oneuse, umin_oneuse>;
+// This matches 16 permutations of
+// max(min(x, y), min(max(x, y), z))
+class FPMed3Pat<ValueType vt,
+ Instruction med3Inst> : Pat<
+ (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+ (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+ (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
+ (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : FPMed3Pat<f32, V_MED3_F32>;
+
+let Predicates = [isGFX9] in {
+def : FPMed3Pat<f16, V_MED3_F16>;
+def : IntMed3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
+def : IntMed3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
+} // End Predicates = [isGFX9]
+
//============================================================================//
// Assembler aliases
//============================================================================//
diff --git a/lib/Target/AMDGPU/SIIntrinsics.td b/lib/Target/AMDGPU/SIIntrinsics.td
index 5da375468713..7b7cf1635050 100644
--- a/lib/Target/AMDGPU/SIIntrinsics.td
+++ b/lib/Target/AMDGPU/SIIntrinsics.td
@@ -14,23 +14,7 @@
let TargetPrefix = "SI", isTarget = 1 in {
- def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-
- def int_SI_export : Intrinsic <[],
- [llvm_i32_ty, // en
- llvm_i32_ty, // vm (FIXME: should be i1)
- llvm_i32_ty, // done (FIXME: should be i1)
- llvm_i32_ty, // tgt
- llvm_i32_ty, // compr (FIXME: should be i1)
- llvm_float_ty, // src0
- llvm_float_ty, // src1
- llvm_float_ty, // src2
- llvm_float_ty], // src3
- []
- >;
-
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
// Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
def int_SI_tbuffer_store : Intrinsic <
@@ -64,146 +48,4 @@ let TargetPrefix = "SI", isTarget = 1 in {
llvm_i32_ty], // tfe(imm)
[IntrReadMem, IntrArgMemOnly]>;
- def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
-
- // Fully-flexible SAMPLE instruction.
- class SampleRaw : Intrinsic <
- [llvm_v4f32_ty], // vdata(VGPR)
- [llvm_anyint_ty, // vaddr(VGPR)
- llvm_v8i32_ty, // rsrc(SGPR)
- llvm_v4i32_ty, // sampler(SGPR)
- llvm_i32_ty, // dmask(imm)
- llvm_i32_ty, // unorm(imm)
- llvm_i32_ty, // r128(imm)
- llvm_i32_ty, // da(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty, // tfe(imm)
- llvm_i32_ty], // lwe(imm)
- [IntrNoMem]>;
-
- // Image instruction without a sampler.
- class Image : Intrinsic <
- [llvm_v4f32_ty], // vdata(VGPR)
- [llvm_anyint_ty, // vaddr(VGPR)
- llvm_v8i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // dmask(imm)
- llvm_i32_ty, // unorm(imm)
- llvm_i32_ty, // r128(imm)
- llvm_i32_ty, // da(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty, // tfe(imm)
- llvm_i32_ty], // lwe(imm)
- [IntrNoMem]>;
-
- // Basic sample
- def int_SI_image_sample : SampleRaw;
- def int_SI_image_sample_cl : SampleRaw;
- def int_SI_image_sample_d : SampleRaw;
- def int_SI_image_sample_d_cl : SampleRaw;
- def int_SI_image_sample_l : SampleRaw;
- def int_SI_image_sample_b : SampleRaw;
- def int_SI_image_sample_b_cl : SampleRaw;
- def int_SI_image_sample_lz : SampleRaw;
- def int_SI_image_sample_cd : SampleRaw;
- def int_SI_image_sample_cd_cl : SampleRaw;
-
- // Sample with comparison
- def int_SI_image_sample_c : SampleRaw;
- def int_SI_image_sample_c_cl : SampleRaw;
- def int_SI_image_sample_c_d : SampleRaw;
- def int_SI_image_sample_c_d_cl : SampleRaw;
- def int_SI_image_sample_c_l : SampleRaw;
- def int_SI_image_sample_c_b : SampleRaw;
- def int_SI_image_sample_c_b_cl : SampleRaw;
- def int_SI_image_sample_c_lz : SampleRaw;
- def int_SI_image_sample_c_cd : SampleRaw;
- def int_SI_image_sample_c_cd_cl : SampleRaw;
-
- // Sample with offsets
- def int_SI_image_sample_o : SampleRaw;
- def int_SI_image_sample_cl_o : SampleRaw;
- def int_SI_image_sample_d_o : SampleRaw;
- def int_SI_image_sample_d_cl_o : SampleRaw;
- def int_SI_image_sample_l_o : SampleRaw;
- def int_SI_image_sample_b_o : SampleRaw;
- def int_SI_image_sample_b_cl_o : SampleRaw;
- def int_SI_image_sample_lz_o : SampleRaw;
- def int_SI_image_sample_cd_o : SampleRaw;
- def int_SI_image_sample_cd_cl_o : SampleRaw;
-
- // Sample with comparison and offsets
- def int_SI_image_sample_c_o : SampleRaw;
- def int_SI_image_sample_c_cl_o : SampleRaw;
- def int_SI_image_sample_c_d_o : SampleRaw;
- def int_SI_image_sample_c_d_cl_o : SampleRaw;
- def int_SI_image_sample_c_l_o : SampleRaw;
- def int_SI_image_sample_c_b_o : SampleRaw;
- def int_SI_image_sample_c_b_cl_o : SampleRaw;
- def int_SI_image_sample_c_lz_o : SampleRaw;
- def int_SI_image_sample_c_cd_o : SampleRaw;
- def int_SI_image_sample_c_cd_cl_o : SampleRaw;
-
- // Basic gather4
- def int_SI_gather4 : SampleRaw;
- def int_SI_gather4_cl : SampleRaw;
- def int_SI_gather4_l : SampleRaw;
- def int_SI_gather4_b : SampleRaw;
- def int_SI_gather4_b_cl : SampleRaw;
- def int_SI_gather4_lz : SampleRaw;
-
- // Gather4 with comparison
- def int_SI_gather4_c : SampleRaw;
- def int_SI_gather4_c_cl : SampleRaw;
- def int_SI_gather4_c_l : SampleRaw;
- def int_SI_gather4_c_b : SampleRaw;
- def int_SI_gather4_c_b_cl : SampleRaw;
- def int_SI_gather4_c_lz : SampleRaw;
-
- // Gather4 with offsets
- def int_SI_gather4_o : SampleRaw;
- def int_SI_gather4_cl_o : SampleRaw;
- def int_SI_gather4_l_o : SampleRaw;
- def int_SI_gather4_b_o : SampleRaw;
- def int_SI_gather4_b_cl_o : SampleRaw;
- def int_SI_gather4_lz_o : SampleRaw;
-
- // Gather4 with comparison and offsets
- def int_SI_gather4_c_o : SampleRaw;
- def int_SI_gather4_c_cl_o : SampleRaw;
- def int_SI_gather4_c_l_o : SampleRaw;
- def int_SI_gather4_c_b_o : SampleRaw;
- def int_SI_gather4_c_b_cl_o : SampleRaw;
- def int_SI_gather4_c_lz_o : SampleRaw;
-
- def int_SI_getlod : SampleRaw;
-
- // Image instrinsics.
- def int_SI_image_load : Image;
- def int_SI_image_load_mip : Image;
- def int_SI_getresinfo : Image;
-
- /* Interpolation Intrinsics */
-
- def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrNoMem]>;
} // End TargetPrefix = "SI", isTarget = 1
-
-let TargetPrefix = "amdgcn", isTarget = 1 in {
- // Emit 2.5 ulp, no denormal division. Should only be inserted by
- // pass based on !fpmath metadata.
- def int_amdgcn_fdiv_fast : Intrinsic<
- [llvm_float_ty], [llvm_float_ty], [IntrNoMem]
- >;
-
- /* Control flow Intrinsics */
-
- def int_amdgcn_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], [IntrConvergent]>;
- def int_amdgcn_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], [IntrConvergent]>;
- def int_amdgcn_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem, IntrConvergent]>;
- def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]>;
- def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]>;
- def int_amdgcn_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], [IntrConvergent]>;
- def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
-}
diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 99fe96c0be22..933a16646746 100644
--- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -39,15 +39,27 @@
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
using namespace llvm;
@@ -56,39 +68,36 @@ using namespace llvm;
namespace {
class SILoadStoreOptimizer : public MachineFunctionPass {
+
+ typedef struct {
+ MachineBasicBlock::iterator I;
+ MachineBasicBlock::iterator Paired;
+ unsigned EltSize;
+ unsigned Offset0;
+ unsigned Offset1;
+ unsigned BaseOff;
+ bool UseST64;
+ SmallVector<MachineInstr*, 8> InstsToMove;
+ } CombineInfo;
+
private:
- const SIInstrInfo *TII;
- const SIRegisterInfo *TRI;
- MachineRegisterInfo *MRI;
- AliasAnalysis *AA;
-
- static bool offsetsCanBeCombined(unsigned Offset0,
- unsigned Offset1,
- unsigned EltSize);
-
- MachineBasicBlock::iterator findMatchingDSInst(
- MachineBasicBlock::iterator I,
- unsigned EltSize,
- SmallVectorImpl<MachineInstr*> &InstsToMove);
-
- MachineBasicBlock::iterator mergeRead2Pair(
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- unsigned EltSize,
- ArrayRef<MachineInstr*> InstsToMove);
-
- MachineBasicBlock::iterator mergeWrite2Pair(
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- unsigned EltSize,
- ArrayRef<MachineInstr*> InstsToMove);
+ const SIInstrInfo *TII = nullptr;
+ const SIRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ AliasAnalysis *AA = nullptr;
+
+ static bool offsetsCanBeCombined(CombineInfo &CI);
+
+ bool findMatchingDSInst(CombineInfo &CI);
+
+ MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI);
+
+ MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI);
public:
static char ID;
- SILoadStoreOptimizer()
- : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), MRI(nullptr),
- AA(nullptr) {}
+ SILoadStoreOptimizer() : MachineFunctionPass(ID) {}
SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) {
initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
@@ -108,7 +117,7 @@ public:
}
};
-} // End anonymous namespace.
+} // end anonymous namespace.
INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
"SI Load / Store Optimizer", false, false)
@@ -141,11 +150,10 @@ static void addDefsToList(const MachineInstr &MI,
}
}
-static bool memAccessesCanBeReordered(
- MachineBasicBlock::iterator A,
- MachineBasicBlock::iterator B,
- const SIInstrInfo *TII,
- llvm::AliasAnalysis * AA) {
+static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
+ MachineBasicBlock::iterator B,
+ const SIInstrInfo *TII,
+ AliasAnalysis * AA) {
return (TII->areMemAccessesTriviallyDisjoint(*A, *B, AA) ||
// RAW or WAR - cannot reorder
// WAW - cannot reorder
@@ -179,7 +187,6 @@ canMoveInstsAcrossMemOp(MachineInstr &MemOp,
ArrayRef<MachineInstr*> InstsToMove,
const SIInstrInfo *TII,
AliasAnalysis *AA) {
-
assert(MemOp.mayLoadOrStore());
for (MachineInstr *InstToMove : InstsToMove) {
@@ -191,47 +198,68 @@ canMoveInstsAcrossMemOp(MachineInstr &MemOp,
return true;
}
-bool SILoadStoreOptimizer::offsetsCanBeCombined(unsigned Offset0,
- unsigned Offset1,
- unsigned Size) {
+bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
// XXX - Would the same offset be OK? Is there any reason this would happen or
// be useful?
- if (Offset0 == Offset1)
+ if (CI.Offset0 == CI.Offset1)
return false;
// This won't be valid if the offset isn't aligned.
- if ((Offset0 % Size != 0) || (Offset1 % Size != 0))
+ if ((CI.Offset0 % CI.EltSize != 0) || (CI.Offset1 % CI.EltSize != 0))
return false;
- unsigned EltOffset0 = Offset0 / Size;
- unsigned EltOffset1 = Offset1 / Size;
+ unsigned EltOffset0 = CI.Offset0 / CI.EltSize;
+ unsigned EltOffset1 = CI.Offset1 / CI.EltSize;
+ CI.UseST64 = false;
+ CI.BaseOff = 0;
+
+ // If the offset in elements doesn't fit in 8-bits, we might be able to use
+ // the stride 64 versions.
+ if ((EltOffset0 % 64 == 0) && (EltOffset1 % 64) == 0 &&
+ isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64)) {
+ CI.Offset0 = EltOffset0 / 64;
+ CI.Offset1 = EltOffset1 / 64;
+ CI.UseST64 = true;
+ return true;
+ }
// Check if the new offsets fit in the reduced 8-bit range.
- if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1))
+ if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1)) {
+ CI.Offset0 = EltOffset0;
+ CI.Offset1 = EltOffset1;
return true;
+ }
- // If the offset in elements doesn't fit in 8-bits, we might be able to use
- // the stride 64 versions.
- if ((EltOffset0 % 64 != 0) || (EltOffset1 % 64) != 0)
- return false;
+ // Try to shift base address to decrease offsets.
+ unsigned OffsetDiff = std::abs((int)EltOffset1 - (int)EltOffset0);
+ CI.BaseOff = std::min(CI.Offset0, CI.Offset1);
+
+ if ((OffsetDiff % 64 == 0) && isUInt<8>(OffsetDiff / 64)) {
+ CI.Offset0 = (EltOffset0 - CI.BaseOff / CI.EltSize) / 64;
+ CI.Offset1 = (EltOffset1 - CI.BaseOff / CI.EltSize) / 64;
+ CI.UseST64 = true;
+ return true;
+ }
+
+ if (isUInt<8>(OffsetDiff)) {
+ CI.Offset0 = EltOffset0 - CI.BaseOff / CI.EltSize;
+ CI.Offset1 = EltOffset1 - CI.BaseOff / CI.EltSize;
+ return true;
+ }
- return isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64);
+ return false;
}
-MachineBasicBlock::iterator
-SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
- unsigned EltSize,
- SmallVectorImpl<MachineInstr*> &InstsToMove) {
- MachineBasicBlock::iterator E = I->getParent()->end();
- MachineBasicBlock::iterator MBBI = I;
+bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) {
+ MachineBasicBlock::iterator E = CI.I->getParent()->end();
+ MachineBasicBlock::iterator MBBI = CI.I;
++MBBI;
SmallVector<const MachineOperand *, 8> DefsToMove;
- addDefsToList(*I, DefsToMove);
+ addDefsToList(*CI.I, DefsToMove);
for ( ; MBBI != E; ++MBBI) {
-
- if (MBBI->getOpcode() != I->getOpcode()) {
+ if (MBBI->getOpcode() != CI.I->getOpcode()) {
// This is not a matching DS instruction, but we can keep looking as
// long as one of these conditions are met:
@@ -242,14 +270,14 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
if (MBBI->hasUnmodeledSideEffects())
// We can't re-order this instruction with respect to other memory
// opeations, so we fail both conditions mentioned above.
- return E;
+ return false;
if (MBBI->mayLoadOrStore() &&
- !memAccessesCanBeReordered(*I, *MBBI, TII, AA)) {
+ !memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA)) {
// We fail condition #1, but we may still be able to satisfy condition
// #2. Add this instruction to the move list and then we will check
// if condition #2 holds once we have selected the matching instruction.
- InstsToMove.push_back(&*MBBI);
+ CI.InstsToMove.push_back(&*MBBI);
addDefsToList(*MBBI, DefsToMove);
continue;
}
@@ -257,13 +285,13 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
// When we match I with another DS instruction we will be moving I down
// to the location of the matched instruction any uses of I will need to
// be moved down as well.
- addToListsIfDependent(*MBBI, DefsToMove, InstsToMove);
+ addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove);
continue;
}
// Don't merge volatiles.
if (MBBI->hasOrderedMemoryRef())
- return E;
+ return false;
// Handle a case like
// DS_WRITE_B32 addr, v, idx0
@@ -271,77 +299,67 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
// DS_WRITE_B32 addr, f(w), idx1
// where the DS_READ_B32 ends up in InstsToMove and therefore prevents
// merging of the two writes.
- if (addToListsIfDependent(*MBBI, DefsToMove, InstsToMove))
+ if (addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove))
continue;
- int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
- const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
+ int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
+ AMDGPU::OpName::addr);
+ const MachineOperand &AddrReg0 = CI.I->getOperand(AddrIdx);
const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
// Check same base pointer. Be careful of subregisters, which can occur with
// vectors of pointers.
if (AddrReg0.getReg() == AddrReg1.getReg() &&
AddrReg0.getSubReg() == AddrReg1.getSubReg()) {
- int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(),
+ int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
AMDGPU::OpName::offset);
- unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
- unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
+ CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm() & 0xffff;
+ CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
+ CI.Paired = MBBI;
// Check both offsets fit in the reduced range.
// We also need to go through the list of instructions that we plan to
// move and make sure they are all safe to move down past the merged
// instruction.
- if (offsetsCanBeCombined(Offset0, Offset1, EltSize) &&
- canMoveInstsAcrossMemOp(*MBBI, InstsToMove, TII, AA))
- return MBBI;
+ if (offsetsCanBeCombined(CI))
+ if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
+ return true;
}
// We've found a load/store that we couldn't merge for some reason.
// We could potentially keep looking, but we'd need to make sure that
// it was safe to move I and also all the instruction in InstsToMove
// down past this instruction.
- if (!memAccessesCanBeReordered(*I, *MBBI, TII, AA) || // check if we can move I across MBBI
- !canMoveInstsAcrossMemOp(*MBBI, InstsToMove, TII, AA) // check if we can move all I's users
- )
+ // check if we can move I across MBBI and if we can move all I's users
+ if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
+ !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
break;
}
- return E;
+ return false;
}
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- unsigned EltSize,
- ArrayRef<MachineInstr*> InstsToMove) {
- MachineBasicBlock *MBB = I->getParent();
+ CombineInfo &CI) {
+ MachineBasicBlock *MBB = CI.I->getParent();
// Be careful, since the addresses could be subregisters themselves in weird
// cases, like vectors of pointers.
- const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
-
- const MachineOperand *Dest0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst);
- const MachineOperand *Dest1 = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst);
-
- unsigned Offset0
- = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
- unsigned Offset1
- = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
-
- unsigned NewOffset0 = Offset0 / EltSize;
- unsigned NewOffset1 = Offset1 / EltSize;
- unsigned Opc = (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;
-
- // Prefer the st64 form if we can use it, even if we can fit the offset in the
- // non st64 version. I'm not sure if there's any real reason to do this.
- bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
- if (UseST64) {
- NewOffset0 /= 64;
- NewOffset1 /= 64;
- Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
- }
+ const auto *AddrReg = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
+
+ const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdst);
+ const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdst);
+
+ unsigned NewOffset0 = CI.Offset0;
+ unsigned NewOffset1 = CI.Offset1;
+ unsigned Opc = (CI.EltSize == 4) ? AMDGPU::DS_READ2_B32
+ : AMDGPU::DS_READ2_B64;
+
+ if (CI.UseST64)
+ Opc = (CI.EltSize == 4) ? AMDGPU::DS_READ2ST64_B32
+ : AMDGPU::DS_READ2ST64_B64;
- unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
- unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
+ unsigned SubRegIdx0 = (CI.EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
+ unsigned SubRegIdx1 = (CI.EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
if (NewOffset0 > NewOffset1) {
// Canonicalize the merged instruction so the smaller offset comes first.
@@ -356,72 +374,70 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
const MCInstrDesc &Read2Desc = TII->get(Opc);
const TargetRegisterClass *SuperRC
- = (EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
+ = (CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
unsigned DestReg = MRI->createVirtualRegister(SuperRC);
- DebugLoc DL = I->getDebugLoc();
- MachineInstrBuilder Read2
- = BuildMI(*MBB, Paired, DL, Read2Desc, DestReg)
- .addOperand(*AddrReg) // addr
- .addImm(NewOffset0) // offset0
- .addImm(NewOffset1) // offset1
- .addImm(0) // gds
- .addMemOperand(*I->memoperands_begin())
- .addMemOperand(*Paired->memoperands_begin());
+ DebugLoc DL = CI.I->getDebugLoc();
+
+ unsigned BaseReg = AddrReg->getReg();
+ unsigned BaseRegFlags = 0;
+ if (CI.BaseOff) {
+ BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BaseRegFlags = RegState::Kill;
+ BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::V_ADD_I32_e32), BaseReg)
+ .addImm(CI.BaseOff)
+ .addReg(AddrReg->getReg());
+ }
+
+ MachineInstrBuilder Read2 =
+ BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg)
+ .addReg(BaseReg, BaseRegFlags) // addr
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addImm(0) // gds
+ .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
+
(void)Read2;
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
// Copy to the old destination registers.
- BuildMI(*MBB, Paired, DL, CopyDesc)
- .addOperand(*Dest0) // Copy to same destination including flags and sub reg.
- .addReg(DestReg, 0, SubRegIdx0);
- MachineInstr *Copy1 = BuildMI(*MBB, Paired, DL, CopyDesc)
- .addOperand(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
+ BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ .add(*Dest0) // Copy to same destination including flags and sub reg.
+ .addReg(DestReg, 0, SubRegIdx0);
+ MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ .add(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
- moveInstsAfter(Copy1, InstsToMove);
+ moveInstsAfter(Copy1, CI.InstsToMove);
- MachineBasicBlock::iterator Next = std::next(I);
- I->eraseFromParent();
- Paired->eraseFromParent();
+ MachineBasicBlock::iterator Next = std::next(CI.I);
+ CI.I->eraseFromParent();
+ CI.Paired->eraseFromParent();
DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
return Next;
}
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- unsigned EltSize,
- ArrayRef<MachineInstr*> InstsToMove) {
- MachineBasicBlock *MBB = I->getParent();
+ CombineInfo &CI) {
+ MachineBasicBlock *MBB = CI.I->getParent();
// Be sure to use .addOperand(), and not .addReg() with these. We want to be
// sure we preserve the subregister index and any register flags set on them.
- const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
- const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
+ const MachineOperand *Addr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
+ const MachineOperand *Data0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0);
const MachineOperand *Data1
- = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
+ = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::data0);
+ unsigned NewOffset0 = CI.Offset0;
+ unsigned NewOffset1 = CI.Offset1;
+ unsigned Opc = (CI.EltSize == 4) ? AMDGPU::DS_WRITE2_B32
+ : AMDGPU::DS_WRITE2_B64;
- unsigned Offset0
- = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
- unsigned Offset1
- = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
-
- unsigned NewOffset0 = Offset0 / EltSize;
- unsigned NewOffset1 = Offset1 / EltSize;
- unsigned Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
-
- // Prefer the st64 form if we can use it, even if we can fit the offset in the
- // non st64 version. I'm not sure if there's any real reason to do this.
- bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
- if (UseST64) {
- NewOffset0 /= 64;
- NewOffset1 /= 64;
- Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
- }
+ if (CI.UseST64)
+ Opc = (CI.EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32
+ : AMDGPU::DS_WRITE2ST64_B64;
if (NewOffset0 > NewOffset1) {
// Canonicalize the merged instruction so the smaller offset comes first.
@@ -434,24 +450,33 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
"Computed offset doesn't fit");
const MCInstrDesc &Write2Desc = TII->get(Opc);
- DebugLoc DL = I->getDebugLoc();
+ DebugLoc DL = CI.I->getDebugLoc();
+
+ unsigned BaseReg = Addr->getReg();
+ unsigned BaseRegFlags = 0;
+ if (CI.BaseOff) {
+ BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BaseRegFlags = RegState::Kill;
+ BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::V_ADD_I32_e32), BaseReg)
+ .addImm(CI.BaseOff)
+ .addReg(Addr->getReg());
+ }
- MachineInstrBuilder Write2
- = BuildMI(*MBB, Paired, DL, Write2Desc)
- .addOperand(*Addr) // addr
- .addOperand(*Data0) // data0
- .addOperand(*Data1) // data1
- .addImm(NewOffset0) // offset0
- .addImm(NewOffset1) // offset1
- .addImm(0) // gds
- .addMemOperand(*I->memoperands_begin())
- .addMemOperand(*Paired->memoperands_begin());
+ MachineInstrBuilder Write2 =
+ BuildMI(*MBB, CI.Paired, DL, Write2Desc)
+ .addReg(BaseReg, BaseRegFlags) // addr
+ .add(*Data0) // data0
+ .add(*Data1) // data1
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addImm(0) // gds
+ .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
- moveInstsAfter(Write2, InstsToMove);
+ moveInstsAfter(Write2, CI.InstsToMove);
- MachineBasicBlock::iterator Next = std::next(I);
- I->eraseFromParent();
- Paired->eraseFromParent();
+ MachineBasicBlock::iterator Next = std::next(CI.I);
+ CI.I->eraseFromParent();
+ CI.Paired->eraseFromParent();
DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
return Next;
@@ -472,27 +497,24 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
continue;
}
- SmallVector<MachineInstr*, 8> InstsToMove;
+ CombineInfo CI;
+ CI.I = I;
unsigned Opc = MI.getOpcode();
if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64) {
- unsigned Size = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
- MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size,
- InstsToMove);
- if (Match != E) {
+ CI.EltSize = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
+ if (findMatchingDSInst(CI)) {
Modified = true;
- I = mergeRead2Pair(I, Match, Size, InstsToMove);
+ I = mergeRead2Pair(CI);
} else {
++I;
}
continue;
} else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64) {
- unsigned Size = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
- MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size,
- InstsToMove);
- if (Match != E) {
+ CI.EltSize = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
+ if (findMatchingDSInst(CI)) {
Modified = true;
- I = mergeWrite2Pair(I, Match, Size, InstsToMove);
+ I = mergeWrite2Pair(CI);
} else {
++I;
}
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 7ed18f27e591..35d3a93d8710 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -51,13 +51,23 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <iterator>
using namespace llvm;
@@ -67,10 +77,10 @@ namespace {
class SILowerControlFlow : public MachineFunctionPass {
private:
- const SIRegisterInfo *TRI;
- const SIInstrInfo *TII;
- LiveIntervals *LIS;
- MachineRegisterInfo *MRI;
+ const SIRegisterInfo *TRI = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ LiveIntervals *LIS = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
void emitIf(MachineInstr &MI);
void emitElse(MachineInstr &MI);
@@ -88,12 +98,7 @@ private:
public:
static char ID;
- SILowerControlFlow() :
- MachineFunctionPass(ID),
- TRI(nullptr),
- TII(nullptr),
- LIS(nullptr),
- MRI(nullptr) {}
+ SILowerControlFlow() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -113,7 +118,7 @@ public:
}
};
-} // End anonymous namespace
+} // end anonymous namespace
char SILowerControlFlow::ID = 0;
@@ -175,9 +180,8 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
// Insert a pseudo terminator to help keep the verifier happy. This will also
// be used later when inserting skips.
- MachineInstr *NewBr =
- BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
- .addOperand(MI.getOperand(2));
+ MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
+ .add(MI.getOperand(2));
if (!LIS) {
MI.eraseFromParent();
@@ -220,8 +224,9 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
// tied. In order to correctly tie the registers, split this into a copy of
// the src like it does.
unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
- BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg)
- .addOperand(MI.getOperand(1)); // Saved EXEC
+ MachineInstr *CopyExec =
+ BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg)
+ .add(MI.getOperand(1)); // Saved EXEC
// This must be inserted before phis and any spill code inserted before the
// else.
@@ -262,6 +267,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
LIS->RemoveMachineInstrFromMaps(MI);
MI.eraseFromParent();
+ LIS->InsertMachineInstrInMaps(*CopyExec);
LIS->InsertMachineInstrInMaps(*OrSaveExec);
LIS->InsertMachineInstrInMaps(*Xor);
@@ -283,10 +289,9 @@ void SILowerControlFlow::emitBreak(MachineInstr &MI) {
const DebugLoc &DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
- MachineInstr *Or =
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
- .addReg(AMDGPU::EXEC)
- .addOperand(MI.getOperand(1));
+ MachineInstr *Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ .addReg(AMDGPU::EXEC)
+ .add(MI.getOperand(1));
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *Or);
@@ -306,13 +311,13 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
const DebugLoc &DL = MI.getDebugLoc();
MachineInstr *AndN2 =
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64_term), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .addOperand(MI.getOperand(0));
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64_term), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .add(MI.getOperand(0));
MachineInstr *Branch =
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addOperand(MI.getOperand(1));
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .add(MI.getOperand(1));
if (LIS) {
LIS->ReplaceMachineInstrInMaps(MI, *AndN2);
@@ -328,9 +333,9 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock::iterator InsPt = MBB.begin();
MachineInstr *NewMI =
- BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .addOperand(MI.getOperand(0));
+ BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .add(MI.getOperand(0));
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp
index be2e14fd4623..3680e02da576 100644
--- a/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -114,18 +114,18 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
assert(Val == 0 || Val == -1);
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32))
- .addOperand(Dst)
- .addImm(Val);
+ .add(Dst)
+ .addImm(Val);
MI.eraseFromParent();
continue;
}
}
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64))
- .addOperand(Dst)
- .addImm(0)
- .addImm(-1)
- .addOperand(Src);
+ .add(Dst)
+ .addImm(0)
+ .addImm(-1)
+ .add(Src);
MI.eraseFromParent();
} else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
SrcRC == &AMDGPU::VReg_1RegClass) {
@@ -140,14 +140,14 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
MRI.getRegClass(DefInst->getOperand(3).getReg()),
&AMDGPU::SGPR_64RegClass)) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64))
- .addOperand(Dst)
- .addReg(AMDGPU::EXEC)
- .addOperand(DefInst->getOperand(3));
+ .add(Dst)
+ .addReg(AMDGPU::EXEC)
+ .add(DefInst->getOperand(3));
} else {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64))
- .addOperand(Dst)
- .addOperand(Src)
- .addImm(0);
+ .add(Dst)
+ .add(Src)
+ .addImm(0);
}
MI.eraseFromParent();
}
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index ecd46b95ca6f..8e612d2ddfda 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -20,12 +20,6 @@
using namespace llvm;
-static cl::opt<bool> EnableSpillSGPRToVGPR(
- "amdgpu-spill-sgpr-to-vgpr",
- cl::desc("Enable spilling VGPRs to SGPRs"),
- cl::ReallyHidden,
- cl::init(true));
-
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
TIDReg(AMDGPU::NoRegister),
@@ -47,13 +41,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
PSInputAddr(0),
+ PSInputEnable(0),
ReturnsVoid(true),
FlatWorkGroupSizes(0, 0),
WavesPerEU(0, 0),
DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
LDSWaveSpillSize(0),
- PSInputEna(0),
NumUserSGPRs(0),
NumSystemSGPRs(0),
HasSpilledSGPRs(false),
@@ -81,34 +75,48 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PrivateMemoryInputPtr(false) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
+ FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
+ WavesPerEU = ST.getWavesPerEU(*F);
- PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
+ // Non-entry functions have no special inputs for now.
+ // TODO: Return early for non-entry CCs.
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC == CallingConv::AMDGPU_PS)
+ PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
- if (!AMDGPU::isShader(F->getCallingConv())) {
+ if (AMDGPU::isKernel(CC)) {
KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
}
- if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
+ if (ST.debuggerEmitPrologue()) {
+ // Enable everything.
WorkGroupIDY = true;
-
- if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
WorkGroupIDZ = true;
-
- if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
WorkItemIDY = true;
-
- if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
WorkItemIDZ = true;
+ } else {
+ if (F->hasFnAttribute("amdgpu-work-group-id-y"))
+ WorkGroupIDY = true;
+
+ if (F->hasFnAttribute("amdgpu-work-group-id-z"))
+ WorkGroupIDZ = true;
+
+ if (F->hasFnAttribute("amdgpu-work-item-id-y"))
+ WorkItemIDY = true;
+
+ if (F->hasFnAttribute("amdgpu-work-item-id-z"))
+ WorkItemIDZ = true;
+ }
// X, XY, and XYZ are the only supported combinations, so make sure Y is
// enabled if Z is.
if (WorkItemIDZ)
WorkItemIDY = true;
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
bool MaySpill = ST.isVGPRSpillingEnabled(*F);
bool HasStackObjects = FrameInfo.hasStackObjects();
@@ -135,12 +143,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
// We don't need to worry about accessing spills with flat instructions.
// TODO: On VI where we must use flat for global, we should be able to omit
// this if it is never used for generic access.
- if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
- ST.isAmdHsaOS())
+ if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS())
FlatScratchInit = true;
-
- FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
- WavesPerEU = ST.getWavesPerEU(*F);
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
@@ -193,45 +197,60 @@ unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
return PrivateMemoryPtrUserSGPR;
}
-SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
- MachineFunction *MF,
- unsigned FrameIndex,
- unsigned SubIdx) {
- if (!EnableSpillSGPRToVGPR)
- return SpilledReg();
-
- const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
-
- MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- int64_t Offset = FrameInfo.getObjectOffset(FrameIndex);
- Offset += SubIdx * 4;
-
- unsigned LaneVGPRIdx = Offset / (64 * 4);
- unsigned Lane = (Offset / 4) % 64;
-
- struct SpilledReg Spill;
- Spill.Lane = Lane;
-
- if (!LaneVGPRs.count(LaneVGPRIdx)) {
- unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass,
- *MF);
+/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
+bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
+ int FI) {
+ std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
- if (LaneVGPR == AMDGPU::NoRegister)
- // We have no VGPRs left for spilling SGPRs.
- return Spill;
+ // This has already been allocated.
+ if (!SpillLanes.empty())
+ return true;
- LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
-
- // Add this register as live-in to all blocks to avoid machine verifer
- // complaining about use of an undefined physical register.
- for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
- BI != BE; ++BI) {
- BI->addLiveIn(LaneVGPR);
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned WaveSize = ST.getWavefrontSize();
+
+ unsigned Size = FrameInfo.getObjectSize(FI);
+ assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
+ assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
+
+ int NumLanes = Size / 4;
+
+ // Make sure to handle the case where a wide SGPR spill may span between two
+ // VGPRs.
+ for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
+ unsigned LaneVGPR;
+ unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
+
+ if (VGPRIndex == 0) {
+ LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
+ if (LaneVGPR == AMDGPU::NoRegister) {
+ // We have no VGPRs left for spilling SGPRs. Reset because we won't
+ // partially spill the SGPR to VGPRs.
+ SGPRToVGPRSpills.erase(FI);
+ NumVGPRSpillLanes -= I;
+ return false;
+ }
+
+ SpillVGPRs.push_back(LaneVGPR);
+
+ // Add this register as live-in to all blocks to avoid machine verifer
+ // complaining about use of an undefined physical register.
+ for (MachineBasicBlock &BB : MF)
+ BB.addLiveIn(LaneVGPR);
+ } else {
+ LaneVGPR = SpillVGPRs.back();
}
+
+ SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
}
- Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
- return Spill;
+ return true;
+}
+
+void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
+ for (auto &R : SGPRToVGPRSpills)
+ MFI.RemoveStackObject(R.first);
}
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 6fc8d18bceba..a84f3e274f82 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -16,13 +16,17 @@
#include "AMDGPUMachineFunction.h"
#include "SIRegisterInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#include <array>
+#include <cassert>
#include <map>
+#include <utility>
namespace llvm {
-class MachineRegisterInfo;
-
class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
public:
explicit AMDGPUImagePseudoSourceValue() :
@@ -109,6 +113,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// Graphics info.
unsigned PSInputAddr;
+ unsigned PSInputEnable;
+
bool ReturnsVoid;
// A pair of default/requested minimum/maximum flat work group sizes.
@@ -130,8 +136,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
public:
// FIXME: Make private
unsigned LDSWaveSpillSize;
- unsigned PSInputEna;
- std::map<unsigned, unsigned> LaneVGPRs;
unsigned ScratchOffsetReg;
unsigned NumUserSGPRs;
unsigned NumSystemSGPRs;
@@ -182,19 +186,39 @@ private:
public:
struct SpilledReg {
- unsigned VGPR;
- int Lane;
+ unsigned VGPR = AMDGPU::NoRegister;
+ int Lane = -1;
+
+ SpilledReg() = default;
SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
- SpilledReg() : VGPR(AMDGPU::NoRegister), Lane(-1) { }
+
bool hasLane() { return Lane != -1;}
bool hasReg() { return VGPR != AMDGPU::NoRegister;}
};
- // SIMachineFunctionInfo definition
+private:
+ // SGPR->VGPR spilling support.
+ typedef std::pair<unsigned, unsigned> SpillRegMask;
+
+ // Track VGPR + wave index for each subregister of the SGPR spilled to
+ // frameindex key.
+ DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
+ unsigned NumVGPRSpillLanes = 0;
+ SmallVector<unsigned, 2> SpillVGPRs;
+
+public:
SIMachineFunctionInfo(const MachineFunction &MF);
- SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
- unsigned SubIdx);
+
+ ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
+ auto I = SGPRToVGPRSpills.find(FrameIndex);
+ return (I == SGPRToVGPRSpills.end()) ?
+ ArrayRef<SpilledReg>() : makeArrayRef(I->second);
+ }
+
+ bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
+ void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
+
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
unsigned getTIDReg() const { return TIDReg; };
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
@@ -399,6 +423,10 @@ public:
return PSInputAddr;
}
+ unsigned getPSInputEnable() const {
+ return PSInputEnable;
+ }
+
bool isPSInputAllocated(unsigned Index) const {
return PSInputAddr & (1 << Index);
}
@@ -407,6 +435,10 @@ public:
PSInputAddr |= 1 << Index;
}
+ void markPSInputEnabled(unsigned Index) {
+ PSInputEnable |= 1 << Index;
+ }
+
bool returnsVoid() const {
return ReturnsVoid;
}
@@ -512,6 +544,6 @@ public:
}
};
-} // End namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index da86bbf9dd2a..9d4e677400e6 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -539,21 +539,30 @@ void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
Preds.push_back(Pred);
assert(none_of(Succs,
- [=](SIScheduleBlock *S) { return PredID == S->getID(); }) &&
+ [=](std::pair<SIScheduleBlock*,
+ SIScheduleBlockLinkKind> S) {
+ return PredID == S.first->getID();
+ }) &&
"Loop in the Block Graph!");
}
-void SIScheduleBlock::addSucc(SIScheduleBlock *Succ) {
+void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
+ SIScheduleBlockLinkKind Kind) {
unsigned SuccID = Succ->getID();
// Check if not already predecessor.
- for (SIScheduleBlock* S : Succs) {
- if (SuccID == S->getID())
+ for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
+ if (SuccID == S.first->getID()) {
+ if (S.second == SIScheduleBlockLinkKind::NoData &&
+ Kind == SIScheduleBlockLinkKind::Data)
+ S.second = Kind;
return;
+ }
}
if (Succ->isHighLatencyBlock())
++NumHighLatencySuccessors;
- Succs.push_back(Succ);
+ Succs.push_back(std::make_pair(Succ, Kind));
+
assert(none_of(Preds,
[=](SIScheduleBlock *P) { return SuccID == P->getID(); }) &&
"Loop in the Block Graph!");
@@ -573,8 +582,10 @@ void SIScheduleBlock::printDebug(bool full) {
}
dbgs() << "\nSuccessors:\n";
- for (SIScheduleBlock* S : Succs) {
- S->printDebug(false);
+ for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
+ if (S.second == SIScheduleBlockLinkKind::Data)
+ dbgs() << "(Data Dep) ";
+ S.first->printDebug(false);
}
if (Scheduled) {
@@ -651,11 +662,21 @@ void SIScheduleBlockCreator::colorHighLatenciesAlone() {
}
}
+static bool
+hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
+ for (const auto &PredDep : SU.Preds) {
+ if (PredDep.getSUnit() == &FromSU &&
+ PredDep.getKind() == llvm::SDep::Data)
+ return true;
+ }
+ return false;
+}
+
void SIScheduleBlockCreator::colorHighLatenciesGroups() {
unsigned DAGSize = DAG->SUnits.size();
unsigned NumHighLatencies = 0;
unsigned GroupSize;
- unsigned Color = NextReservedID;
+ int Color = NextReservedID;
unsigned Count = 0;
std::set<unsigned> FormingGroup;
@@ -675,35 +696,102 @@ void SIScheduleBlockCreator::colorHighLatenciesGroups() {
else
GroupSize = 4;
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &DAG->SUnits[i];
- if (DAG->IsHighLatencySU[SU->NodeNum]) {
+ for (unsigned SUNum : DAG->TopDownIndex2SU) {
+ const SUnit &SU = DAG->SUnits[SUNum];
+ if (DAG->IsHighLatencySU[SU.NodeNum]) {
unsigned CompatibleGroup = true;
- unsigned ProposedColor = Color;
+ int ProposedColor = Color;
+ std::vector<int> AdditionalElements;
+
+ // We don't want to put in the same block
+ // two high latency instructions that depend
+ // on each other.
+ // One way would be to check canAddEdge
+ // in both directions, but that currently is not
+ // enough because there the high latency order is
+ // enforced (via links).
+ // Instead, look at the dependencies between the
+ // high latency instructions and deduce if it is
+ // a data dependency or not.
for (unsigned j : FormingGroup) {
- // TODO: Currently CompatibleGroup will always be false,
- // because the graph enforces the load order. This
- // can be fixed, but as keeping the load order is often
- // good for performance that causes a performance hit (both
- // the default scheduler and this scheduler).
- // When this scheduler determines a good load order,
- // this can be fixed.
- if (!DAG->canAddEdge(SU, &DAG->SUnits[j]) ||
- !DAG->canAddEdge(&DAG->SUnits[j], SU))
+ bool HasSubGraph;
+ std::vector<int> SubGraph;
+ // By construction (topological order), if SU and
+ // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
+ // in the parent graph of SU.
+#ifndef NDEBUG
+ SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
+ HasSubGraph);
+ assert(!HasSubGraph);
+#endif
+ SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
+ HasSubGraph);
+ if (!HasSubGraph)
+ continue; // No dependencies between each other
+ else if (SubGraph.size() > 5) {
+ // Too many elements would be required to be added to the block.
CompatibleGroup = false;
+ break;
+ }
+ else {
+ // Check the type of dependency
+ for (unsigned k : SubGraph) {
+ // If in the path to join the two instructions,
+ // there is another high latency instruction,
+ // or instructions colored for another block
+ // abort the merge.
+ if (DAG->IsHighLatencySU[k] ||
+ (CurrentColoring[k] != ProposedColor &&
+ CurrentColoring[k] != 0)) {
+ CompatibleGroup = false;
+ break;
+ }
+ // If one of the SU in the subgraph depends on the result of SU j,
+ // there'll be a data dependency.
+ if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
+ CompatibleGroup = false;
+ break;
+ }
+ }
+ if (!CompatibleGroup)
+ break;
+ // Same check for the SU
+ if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
+ CompatibleGroup = false;
+ break;
+ }
+ // Add all the required instructions to the block
+ // These cannot live in another block (because they
+ // depend (order dependency) on one of the
+ // instruction in the block, and are required for the
+ // high latency instruction we add.
+ AdditionalElements.insert(AdditionalElements.end(),
+ SubGraph.begin(), SubGraph.end());
+ }
}
- if (!CompatibleGroup || ++Count == GroupSize) {
+ if (CompatibleGroup) {
+ FormingGroup.insert(SU.NodeNum);
+ for (unsigned j : AdditionalElements)
+ CurrentColoring[j] = ProposedColor;
+ CurrentColoring[SU.NodeNum] = ProposedColor;
+ ++Count;
+ }
+ // Found one incompatible instruction,
+ // or has filled a big enough group.
+ // -> start a new one.
+ if (!CompatibleGroup) {
FormingGroup.clear();
Color = ++NextReservedID;
- if (!CompatibleGroup) {
- ProposedColor = Color;
- FormingGroup.insert(SU->NodeNum);
- }
+ ProposedColor = Color;
+ FormingGroup.insert(SU.NodeNum);
+ CurrentColoring[SU.NodeNum] = ProposedColor;
+ Count = 0;
+ } else if (Count == GroupSize) {
+ FormingGroup.clear();
+ Color = ++NextReservedID;
+ ProposedColor = Color;
Count = 0;
- } else {
- FormingGroup.insert(SU->NodeNum);
}
- CurrentColoring[SU->NodeNum] = ProposedColor;
}
}
}
@@ -835,6 +923,17 @@ void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
unsigned DAGSize = DAG->SUnits.size();
std::vector<int> PendingColoring = CurrentColoring;
+ assert(DAGSize >= 1 &&
+ CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
+ CurrentTopDownReservedDependencyColoring.size() == DAGSize);
+ // If there is no reserved block at all, do nothing. We don't want
+ // everything in one block.
+ if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
+ CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
+ *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
+ CurrentTopDownReservedDependencyColoring.end()) == 0)
+ return;
+
for (unsigned SUNum : DAG->BottomUpIndex2SU) {
SUnit *SU = &DAG->SUnits[SUNum];
std::set<unsigned> SUColors;
@@ -856,6 +955,9 @@ void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
SUColors.insert(CurrentColoring[Succ->NodeNum]);
SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
}
+ // If there is only one child/parent block, and that block
+ // is not among the ones we are removing in this path, then
+ // merge the instruction to that block
if (SUColors.size() == 1 && SUColorsPending.size() == 1)
PendingColoring[SU->NodeNum] = *SUColors.begin();
else // TODO: Attribute new colors depending on color
@@ -974,12 +1076,7 @@ void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
for (unsigned SUNum : DAG->BottomUpIndex2SU) {
SUnit *SU = &DAG->SUnits[SUNum];
unsigned color = CurrentColoring[SU->NodeNum];
- std::map<unsigned, unsigned>::iterator Pos = ColorCount.find(color);
- if (Pos != ColorCount.end()) {
- ++ColorCount[color];
- } else {
- ColorCount[color] = 1;
- }
+ ++ColorCount[color];
}
for (unsigned SUNum : DAG->BottomUpIndex2SU) {
@@ -1087,7 +1184,8 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria
if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
continue;
if (Node2CurrentBlock[Succ->NodeNum] != SUID)
- CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]]);
+ CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
+ SuccDep.isCtrl() ? NoData : Data);
}
for (SDep& PredDep : SU->Preds) {
SUnit *Pred = PredDep.getSUnit();
@@ -1281,10 +1379,8 @@ void SIScheduleBlockCreator::fillStats() {
Block->Height = 0;
else {
unsigned Height = 0;
- for (SIScheduleBlock *Succ : Block->getSuccs()) {
- if (Height < Succ->Height + 1)
- Height = Succ->Height + 1;
- }
+ for (const auto &Succ : Block->getSuccs())
+ Height = std::min(Height, Succ.first->Height + 1);
Block->Height = Height;
}
}
@@ -1331,13 +1427,7 @@ SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
continue;
int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
- std::map<unsigned, unsigned>::iterator RegPos =
- LiveOutRegsNumUsages[PredID].find(Reg);
- if (RegPos != LiveOutRegsNumUsages[PredID].end()) {
- ++LiveOutRegsNumUsages[PredID][Reg];
- } else {
- LiveOutRegsNumUsages[PredID][Reg] = 1;
- }
+ ++LiveOutRegsNumUsages[PredID][Reg];
}
}
@@ -1361,6 +1451,24 @@ SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
std::set<unsigned> InRegs = DAG->getInRegs();
addLiveRegs(InRegs);
+ // Increase LiveOutRegsNumUsages for blocks
+ // producing registers consumed in another
+ // scheduling region.
+ for (unsigned Reg : DAG->getOutRegs()) {
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ // Do reverse traversal
+ int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
+ SIScheduleBlock *Block = Blocks[ID];
+ const std::set<unsigned> &OutRegs = Block->getOutRegs();
+
+ if (OutRegs.find(Reg) == OutRegs.end())
+ continue;
+
+ ++LiveOutRegsNumUsages[ID][Reg];
+ break;
+ }
+ }
+
// Fill LiveRegsConsumers for regs that were already
// defined before scheduling.
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
@@ -1377,12 +1485,8 @@ SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
}
}
- if (!Found) {
- if (LiveRegsConsumers.find(Reg) == LiveRegsConsumers.end())
- LiveRegsConsumers[Reg] = 1;
- else
- ++LiveRegsConsumers[Reg];
- }
+ if (!Found)
+ ++LiveRegsConsumers[Reg];
}
}
@@ -1403,6 +1507,7 @@ SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
for (SIScheduleBlock* Block : BlocksScheduled) {
dbgs() << ' ' << Block->getID();
}
+ dbgs() << '\n';
);
}
@@ -1464,8 +1569,8 @@ SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
VregCurrentUsage, SregCurrentUsage);
if (VregCurrentUsage > maxVregUsage)
maxVregUsage = VregCurrentUsage;
- if (VregCurrentUsage > maxSregUsage)
- maxSregUsage = VregCurrentUsage;
+ if (SregCurrentUsage > maxSregUsage)
+ maxSregUsage = SregCurrentUsage;
DEBUG(
dbgs() << "Picking New Blocks\n";
dbgs() << "Available: ";
@@ -1556,17 +1661,13 @@ void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
}
void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
- for (SIScheduleBlock* Block : Parent->getSuccs()) {
- --BlockNumPredsLeft[Block->getID()];
- if (BlockNumPredsLeft[Block->getID()] == 0) {
- ReadyBlocks.push_back(Block);
- }
- // TODO: Improve check. When the dependency between the high latency
- // instructions and the instructions of the other blocks are WAR or WAW
- // there will be no wait triggered. We would like these cases to not
- // update LastPosHighLatencyParentScheduled.
- if (Parent->isHighLatencyBlock())
- LastPosHighLatencyParentScheduled[Block->getID()] = NumBlockScheduled;
+ for (const auto &Block : Parent->getSuccs()) {
+ if (--BlockNumPredsLeft[Block.first->getID()] == 0)
+ ReadyBlocks.push_back(Block.first);
+
+ if (Parent->isHighLatencyBlock() &&
+ Block.second == SIScheduleBlockLinkKind::Data)
+ LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
}
}
@@ -1578,12 +1679,10 @@ void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
LiveOutRegsNumUsages[Block->getID()].begin(),
E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
std::pair<unsigned, unsigned> RegP = *RegI;
- if (LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end())
- LiveRegsConsumers[RegP.first] = RegP.second;
- else {
- assert(LiveRegsConsumers[RegP.first] == 0);
- LiveRegsConsumers[RegP.first] += RegP.second;
- }
+ // We produce this register, thus it must not be previously alive.
+ assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
+ LiveRegsConsumers[RegP.first] == 0);
+ LiveRegsConsumers[RegP.first] += RegP.second;
}
if (LastPosHighLatencyParentScheduled[Block->getID()] >
(unsigned)LastPosWaitedHighLatency)
@@ -1825,7 +1924,9 @@ void SIScheduleDAGMI::schedule()
// if VGPR usage is extremely high, try other good performing variants
// which could lead to lower VGPR usage
if (Best.MaxVGPRUsage > 180) {
- std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = {
+ static const std::pair<SISchedulerBlockCreatorVariant,
+ SISchedulerBlockSchedulerVariant>
+ Variants[] = {
{ LatenciesAlone, BlockRegUsageLatency },
// { LatenciesAlone, BlockRegUsage },
{ LatenciesGrouped, BlockLatencyRegUsage },
@@ -1844,7 +1945,9 @@ void SIScheduleDAGMI::schedule()
// if VGPR usage is still extremely high, we may spill. Try other variants
// which are less performing, but that could lead to lower VGPR usage.
if (Best.MaxVGPRUsage > 200) {
- std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = {
+ static const std::pair<SISchedulerBlockCreatorVariant,
+ SISchedulerBlockSchedulerVariant>
+ Variants[] = {
// { LatenciesAlone, BlockRegUsageLatency },
{ LatenciesAlone, BlockRegUsage },
// { LatenciesGrouped, BlockLatencyRegUsage },
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.h b/lib/Target/AMDGPU/SIMachineScheduler.h
index 77c07350d325..122d0f67ca8c 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.h
+++ b/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -40,13 +40,12 @@ enum SIScheduleCandReason {
struct SISchedulerCandidate {
// The reason for this candidate.
- SIScheduleCandReason Reason;
+ SIScheduleCandReason Reason = NoCand;
// Set of reasons that apply to multiple candidates.
- uint32_t RepeatReasonSet;
+ uint32_t RepeatReasonSet = 0;
- SISchedulerCandidate()
- : Reason(NoCand), RepeatReasonSet(0) {}
+ SISchedulerCandidate() = default;
bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
@@ -55,6 +54,11 @@ struct SISchedulerCandidate {
class SIScheduleDAGMI;
class SIScheduleBlockCreator;
+enum SIScheduleBlockLinkKind {
+ NoData,
+ Data
+};
+
class SIScheduleBlock {
SIScheduleDAGMI *DAG;
SIScheduleBlockCreator *BC;
@@ -84,8 +88,8 @@ class SIScheduleBlock {
std::set<unsigned> LiveInRegs;
std::set<unsigned> LiveOutRegs;
- bool Scheduled;
- bool HighLatencyBlock;
+ bool Scheduled = false;
+ bool HighLatencyBlock = false;
std::vector<unsigned> HasLowLatencyNonWaitedParent;
@@ -93,14 +97,14 @@ class SIScheduleBlock {
unsigned ID;
std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
- std::vector<SIScheduleBlock*> Succs; // All blocks successors.
- unsigned NumHighLatencySuccessors;
+ // All blocks successors, and the kind of link
+ std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
+ unsigned NumHighLatencySuccessors = 0;
public:
SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
unsigned ID):
- DAG(DAG), BC(BC), TopRPTracker(TopPressure), Scheduled(false),
- HighLatencyBlock(false), ID(ID), NumHighLatencySuccessors(0) {}
+ DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
~SIScheduleBlock() = default;
@@ -114,10 +118,11 @@ public:
// Add block pred, which has instruction predecessor of SU.
void addPred(SIScheduleBlock *Pred);
- void addSucc(SIScheduleBlock *Succ);
+ void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
- const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
+ ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
+ getSuccs() const { return Succs; }
unsigned Height; // Maximum topdown path length to block without outputs
unsigned Depth; // Maximum bottomup path length to block without inputs
@@ -213,9 +218,9 @@ struct SIScheduleBlocks {
};
enum SISchedulerBlockCreatorVariant {
- LatenciesAlone,
- LatenciesGrouped,
- LatenciesAlonePlusConsecutive
+ LatenciesAlone,
+ LatenciesGrouped,
+ LatenciesAlonePlusConsecutive
};
class SIScheduleBlockCreator {
@@ -451,6 +456,7 @@ public:
LiveIntervals *getLIS() { return LIS; }
MachineRegisterInfo *getMRI() { return &MRI; }
const TargetRegisterInfo *getTRI() { return TRI; }
+ ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
SUnit& getEntrySU() { return EntrySU; }
SUnit& getExitSU() { return ExitSU; }
@@ -469,6 +475,14 @@ public:
return InRegs;
}
+ std::set<unsigned> getOutRegs() {
+ std::set<unsigned> OutRegs;
+ for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
+ OutRegs.insert(RegMaskPair.RegUnit);
+ }
+ return OutRegs;
+ };
+
unsigned getVGPRSetID() const { return VGPRSetID; }
unsigned getSGPRSetID() const { return SGPRSetID; }
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
new file mode 100644
index 000000000000..e02c2e3240e8
--- /dev/null
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -0,0 +1,713 @@
+//===-- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass tries to apply several peephole SDWA patterns.
+///
+/// E.g. original:
+/// V_LSHRREV_B32_e32 %vreg0, 16, %vreg1
+/// V_ADD_I32_e32 %vreg2, %vreg0, %vreg3
+/// V_LSHLREV_B32_e32 %vreg4, 16, %vreg2
+///
+/// Replace:
+/// V_ADD_I32_sdwa %vreg4, %vreg1, %vreg3
+/// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+///
+//===----------------------------------------------------------------------===//
+
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include <unordered_map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-peephole-sdwa"
+
+STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
+STATISTIC(NumSDWAInstructionsPeepholed,
+ "Number of instruction converted to SDWA.");
+
+namespace {
+
+class SDWAOperand;
+
+class SIPeepholeSDWA : public MachineFunctionPass {
+private:
+ MachineRegisterInfo *MRI;
+ const SIRegisterInfo *TRI;
+ const SIInstrInfo *TII;
+
+ std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
+
+ Optional<int64_t> foldToImm(const MachineOperand &Op) const;
+
+public:
+ static char ID;
+
+ typedef SmallVector<std::unique_ptr<SDWAOperand>, 4> SDWAOperandsVector;
+
+ SIPeepholeSDWA() : MachineFunctionPass(ID) {
+ initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void matchSDWAOperands(MachineFunction &MF);
+ bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
+
+ StringRef getPassName() const override { return "SI Peephole SDWA"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+class SDWAOperand {
+private:
+ MachineOperand *Target; // Operand that would be used in converted instruction
+ MachineOperand *Replaced; // Operand that would be replace by Target
+
+public:
+ SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
+ : Target(TargetOp), Replaced(ReplacedOp) {
+ assert(Target->isReg());
+ assert(Replaced->isReg());
+ }
+
+ virtual ~SDWAOperand() {}
+
+ virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0;
+ virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
+
+ MachineOperand *getTargetOperand() const { return Target; }
+ MachineOperand *getReplacedOperand() const { return Replaced; }
+ MachineInstr *getParentInst() const { return Target->getParent(); }
+ MachineRegisterInfo *getMRI() const {
+ return &getParentInst()->getParent()->getParent()->getRegInfo();
+ }
+};
+
+using namespace AMDGPU::SDWA;
+
+class SDWASrcOperand : public SDWAOperand {
+private:
+ SdwaSel SrcSel;
+ bool Abs;
+ bool Neg;
+ bool Sext;
+
+public:
+ SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
+ SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
+ bool Sext_ = false)
+ : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
+ Neg(Neg_), Sext(Sext_) {}
+
+ virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
+ virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
+
+ SdwaSel getSrcSel() const { return SrcSel; }
+ bool getAbs() const { return Abs; }
+ bool getNeg() const { return Neg; }
+ bool getSext() const { return Sext; }
+
+ uint64_t getSrcMods() const;
+};
+
+class SDWADstOperand : public SDWAOperand {
+private:
+ SdwaSel DstSel;
+ DstUnused DstUn;
+
+public:
+ SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
+ SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
+ : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
+
+ virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
+ virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
+
+ SdwaSel getDstSel() const { return DstSel; }
+ DstUnused getDstUnused() const { return DstUn; }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
+
+char SIPeepholeSDWA::ID = 0;
+
+char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
+
+FunctionPass *llvm::createSIPeepholeSDWAPass() {
+ return new SIPeepholeSDWA();
+}
+
+#ifndef NDEBUG
+
+static raw_ostream& operator<<(raw_ostream &OS, const SdwaSel &Sel) {
+ switch(Sel) {
+ case BYTE_0: OS << "BYTE_0"; break;
+ case BYTE_1: OS << "BYTE_1"; break;
+ case BYTE_2: OS << "BYTE_2"; break;
+ case BYTE_3: OS << "BYTE_3"; break;
+ case WORD_0: OS << "WORD_0"; break;
+ case WORD_1: OS << "WORD_1"; break;
+ case DWORD: OS << "DWORD"; break;
+ }
+ return OS;
+}
+
+static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) {
+ switch(Un) {
+ case UNUSED_PAD: OS << "UNUSED_PAD"; break;
+ case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
+ case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break;
+ }
+ return OS;
+}
+
+static raw_ostream& operator<<(raw_ostream &OS, const SDWASrcOperand &Src) {
+ OS << "SDWA src: " << *Src.getTargetOperand()
+ << " src_sel:" << Src.getSrcSel()
+ << " abs:" << Src.getAbs() << " neg:" << Src.getNeg()
+ << " sext:" << Src.getSext() << '\n';
+ return OS;
+}
+
+static raw_ostream& operator<<(raw_ostream &OS, const SDWADstOperand &Dst) {
+ OS << "SDWA dst: " << *Dst.getTargetOperand()
+ << " dst_sel:" << Dst.getDstSel()
+ << " dst_unused:" << Dst.getDstUnused() << '\n';
+ return OS;
+}
+
+#endif
+
+static void copyRegOperand(MachineOperand &To, const MachineOperand &From) {
+ assert(To.isReg() && From.isReg());
+ To.setReg(From.getReg());
+ To.setSubReg(From.getSubReg());
+ To.setIsUndef(From.isUndef());
+ if (To.isUse()) {
+ To.setIsKill(From.isKill());
+ } else {
+ To.setIsDead(From.isDead());
+ }
+}
+
+static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
+ return LHS.isReg() &&
+ RHS.isReg() &&
+ LHS.getReg() == RHS.getReg() &&
+ LHS.getSubReg() == RHS.getSubReg();
+}
+
+static bool isSubregOf(const MachineOperand &SubReg,
+ const MachineOperand &SuperReg,
+ const TargetRegisterInfo *TRI) {
+
+ if (!SuperReg.isReg() || !SubReg.isReg())
+ return false;
+
+ if (isSameReg(SuperReg, SubReg))
+ return true;
+
+ if (SuperReg.getReg() != SubReg.getReg())
+ return false;
+
+ LaneBitmask SuperMask = TRI->getSubRegIndexLaneMask(SuperReg.getSubReg());
+ LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubReg.getSubReg());
+ SuperMask |= ~SubMask;
+ return SuperMask.all();
+}
+
+uint64_t SDWASrcOperand::getSrcMods() const {
+ uint64_t Mods = 0;
+ if (Abs || Neg) {
+ assert(!Sext &&
+ "Float and integer src modifiers can't be set simulteniously");
+ Mods |= Abs ? SISrcMods::ABS : 0;
+ Mods |= Neg ? SISrcMods::NEG : 0;
+ } else if (Sext) {
+ Mods |= SISrcMods::SEXT;
+ }
+
+ return Mods;
+}
+
+MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
+ // For SDWA src operand potential instruction is one that use register
+ // defined by parent instruction
+ MachineRegisterInfo *MRI = getMRI();
+ MachineOperand *Replaced = getReplacedOperand();
+ assert(Replaced->isReg());
+
+ MachineInstr *PotentialMI = nullptr;
+ for (MachineOperand &PotentialMO : MRI->use_operands(Replaced->getReg())) {
+ // If this is use of another subreg of dst reg then do nothing
+ if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
+ continue;
+
+ // If there exist use of superreg of dst then we should not combine this
+ // opernad
+ if (!isSameReg(PotentialMO, *Replaced))
+ return nullptr;
+
+ // Check that PotentialMI is only instruction that uses dst reg
+ if (PotentialMI == nullptr) {
+ PotentialMI = PotentialMO.getParent();
+ } else if (PotentialMI != PotentialMO.getParent()) {
+ return nullptr;
+ }
+ }
+
+ return PotentialMI;
+}
+
+bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
+ // Find operand in instruction that matches source operand and replace it with
+ // target operand. Set corresponding src_sel
+
+ MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
+ MachineOperand *SrcMods =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
+ assert(Src && Src->isReg());
+ if (!isSameReg(*Src, *getReplacedOperand())) {
+ // If this is not src0 then it should be src1
+ Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
+ SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
+
+ assert(Src && Src->isReg());
+
+ if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
+ MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
+ !isSameReg(*Src, *getReplacedOperand())) {
+ // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
+ // src2. This is not allowed.
+ return false;
+ }
+
+ assert(isSameReg(*Src, *getReplacedOperand()) && SrcSel && SrcMods);
+ }
+ copyRegOperand(*Src, *getTargetOperand());
+ SrcSel->setImm(getSrcSel());
+ SrcMods->setImm(getSrcMods());
+ getTargetOperand()->setIsKill(false);
+ return true;
+}
+
+MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
+ // For SDWA dst operand potential instruction is one that defines register
+ // that this operand uses
+ MachineRegisterInfo *MRI = getMRI();
+ MachineInstr *ParentMI = getParentInst();
+ MachineOperand *Replaced = getReplacedOperand();
+ assert(Replaced->isReg());
+
+ for (MachineOperand &PotentialMO : MRI->def_operands(Replaced->getReg())) {
+ if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
+ continue;
+
+ if (!isSameReg(*Replaced, PotentialMO))
+ return nullptr;
+
+ // Check that ParentMI is the only instruction that uses replaced register
+ for (MachineOperand &UseMO : MRI->use_operands(PotentialMO.getReg())) {
+ if (isSubregOf(UseMO, PotentialMO, MRI->getTargetRegisterInfo()) &&
+ UseMO.getParent() != ParentMI) {
+ return nullptr;
+ }
+ }
+
+ // Due to SSA this should be onle def of replaced register, so return it
+ return PotentialMO.getParent();
+ }
+
+ return nullptr;
+}
+
+bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
+ // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
+
+ if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
+ MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
+ getDstSel() != AMDGPU::SDWA::DWORD) {
+ // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
+ return false;
+ }
+
+ MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ assert(Operand &&
+ Operand->isReg() &&
+ isSameReg(*Operand, *getReplacedOperand()));
+ copyRegOperand(*Operand, *getTargetOperand());
+ MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
+ assert(DstSel);
+ DstSel->setImm(getDstSel());
+ MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
+ assert(DstUnused);
+ DstUnused->setImm(getDstUnused());
+
+ // Remove original instruction because it would conflict with our new
+ // instruction by register definition
+ getParentInst()->eraseFromParent();
+ return true;
+}
+
+Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
+ if (Op.isImm()) {
+ return Op.getImm();
+ }
+
+ // If this is not immediate then it can be copy of immediate value, e.g.:
+ // %vreg1<def> = S_MOV_B32 255;
+ if (Op.isReg()) {
+ for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
+ if (!isSameReg(Op, Def))
+ continue;
+
+ const MachineInstr *DefInst = Def.getParent();
+ if (!TII->isFoldableCopy(*DefInst))
+ return None;
+
+ const MachineOperand &Copied = DefInst->getOperand(1);
+ if (!Copied.isImm())
+ return None;
+
+ return Copied.getImm();
+ }
+ }
+
+ return None;
+}
+
+void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case AMDGPU::V_LSHRREV_B32_e32:
+ case AMDGPU::V_ASHRREV_I32_e32:
+ case AMDGPU::V_LSHLREV_B32_e32: {
+ // from: v_lshrrev_b32_e32 v1, 16/24, v0
+ // to SDWA src:v0 src_sel:WORD_1/BYTE_3
+
+ // from: v_ashrrev_i32_e32 v1, 16/24, v0
+ // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
+
+ // from: v_lshlrev_b32_e32 v1, 16/24, v0
+ // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ auto Imm = foldToImm(*Src0);
+ if (!Imm)
+ break;
+
+ if (*Imm != 16 && *Imm != 24)
+ break;
+
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ if (TRI->isPhysicalRegister(Src1->getReg()) ||
+ TRI->isPhysicalRegister(Dst->getReg()))
+ break;
+
+ if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
+ auto SDWADst = make_unique<SDWADstOperand>(
+ Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
+ SDWAOperands[&MI] = std::move(SDWADst);
+ ++NumSDWAPatternsFound;
+ } else {
+ auto SDWASrc = make_unique<SDWASrcOperand>(
+ Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
+ Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
+ SDWAOperands[&MI] = std::move(SDWASrc);
+ ++NumSDWAPatternsFound;
+ }
+ break;
+ }
+
+ case AMDGPU::V_LSHRREV_B16_e32:
+ case AMDGPU::V_ASHRREV_I16_e32:
+ case AMDGPU::V_LSHLREV_B16_e32: {
+ // from: v_lshrrev_b16_e32 v1, 8, v0
+ // to SDWA src:v0 src_sel:BYTE_1
+
+ // from: v_ashrrev_i16_e32 v1, 8, v0
+ // to SDWA src:v0 src_sel:BYTE_1 sext:1
+
+ // from: v_lshlrev_b16_e32 v1, 8, v0
+ // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ auto Imm = foldToImm(*Src0);
+ if (!Imm || *Imm != 8)
+ break;
+
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+ if (TRI->isPhysicalRegister(Src1->getReg()) ||
+ TRI->isPhysicalRegister(Dst->getReg()))
+ break;
+
+ if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
+ auto SDWADst =
+ make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
+ SDWAOperands[&MI] = std::move(SDWADst);
+ ++NumSDWAPatternsFound;
+ } else {
+ auto SDWASrc = make_unique<SDWASrcOperand>(
+ Src1, Dst, BYTE_1, false, false,
+ Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
+ SDWAOperands[&MI] = std::move(SDWASrc);
+ ++NumSDWAPatternsFound;
+ }
+ break;
+ }
+
+ case AMDGPU::V_BFE_I32:
+ case AMDGPU::V_BFE_U32: {
+ // e.g.:
+ // from: v_bfe_u32 v1, v0, 8, 8
+ // to SDWA src:v0 src_sel:BYTE_1
+
+ // offset | width | src_sel
+ // ------------------------
+ // 0 | 8 | BYTE_0
+ // 0 | 16 | WORD_0
+ // 0 | 32 | DWORD ?
+ // 8 | 8 | BYTE_1
+ // 16 | 8 | BYTE_2
+ // 16 | 16 | WORD_1
+ // 24 | 8 | BYTE_3
+
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ auto Offset = foldToImm(*Src1);
+ if (!Offset)
+ break;
+
+ MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ auto Width = foldToImm(*Src2);
+ if (!Width)
+ break;
+
+ SdwaSel SrcSel = DWORD;
+
+ if (*Offset == 0 && *Width == 8)
+ SrcSel = BYTE_0;
+ else if (*Offset == 0 && *Width == 16)
+ SrcSel = WORD_0;
+ else if (*Offset == 0 && *Width == 32)
+ SrcSel = DWORD;
+ else if (*Offset == 8 && *Width == 8)
+ SrcSel = BYTE_1;
+ else if (*Offset == 16 && *Width == 8)
+ SrcSel = BYTE_2;
+ else if (*Offset == 16 && *Width == 16)
+ SrcSel = WORD_1;
+ else if (*Offset == 24 && *Width == 8)
+ SrcSel = BYTE_3;
+ else
+ break;
+
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+ if (TRI->isPhysicalRegister(Src0->getReg()) ||
+ TRI->isPhysicalRegister(Dst->getReg()))
+ break;
+
+ auto SDWASrc = make_unique<SDWASrcOperand>(
+ Src0, Dst, SrcSel, false, false,
+ Opcode == AMDGPU::V_BFE_U32 ? false : true);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
+ SDWAOperands[&MI] = std::move(SDWASrc);
+ ++NumSDWAPatternsFound;
+ break;
+ }
+ case AMDGPU::V_AND_B32_e32: {
+ // e.g.:
+ // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
+ // to SDWA src:v0 src_sel:WORD_0/BYTE_0
+
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ auto Imm = foldToImm(*Src0);
+ if (!Imm)
+ break;
+
+ if (*Imm != 0x0000ffff && *Imm != 0x000000ff)
+ break;
+
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+ if (TRI->isPhysicalRegister(Src1->getReg()) ||
+ TRI->isPhysicalRegister(Dst->getReg()))
+ break;
+
+ auto SDWASrc = make_unique<SDWASrcOperand>(
+ Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
+ SDWAOperands[&MI] = std::move(SDWASrc);
+ ++NumSDWAPatternsFound;
+ break;
+ }
+ }
+ }
+ }
+}
+
+bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
+ const SDWAOperandsVector &SDWAOperands) {
+ // Check if this instruction can be converted to SDWA:
+ // 1. Does this opcode support SDWA
+ if (AMDGPU::getSDWAOp(MI.getOpcode()) == -1)
+ return false;
+
+ // 2. Are all operands - VGPRs
+ for (const MachineOperand &Operand : MI.explicit_operands()) {
+ if (!Operand.isReg() || !TRI->isVGPR(*MRI, Operand.getReg()))
+ return false;
+ }
+
+ // Convert to sdwa
+ int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode());
+ assert(SDWAOpcode != -1);
+
+ const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
+
+ // Create SDWA version of instruction MI and initialize its operands
+ MachineInstrBuilder SDWAInst =
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
+
+ // Copy dst, if it is present in original then should also be present in SDWA
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ if (Dst) {
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
+ SDWAInst.add(*Dst);
+ } else {
+ assert(TII->isVOPC(MI));
+ }
+
+ // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
+ // src0_modifiers (except for v_nop_sdwa, but it can't get here)
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ assert(
+ Src0 &&
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
+ SDWAInst.addImm(0);
+ SDWAInst.add(*Src0);
+
+ // Copy src1 if present, initialize src1_modifiers.
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (Src1) {
+ assert(
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
+ SDWAInst.addImm(0);
+ SDWAInst.add(*Src1);
+ } else {
+ assert(TII->isVOP1(MI));
+ }
+
+ if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
+ SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
+ // v_mac_f16/32 has additional src2 operand tied to vdst
+ MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ assert(Src2);
+ SDWAInst.add(*Src2);
+ }
+
+ // Initialize clamp.
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
+ SDWAInst.addImm(0);
+
+ // Initialize dst_sel and dst_unused if present
+ if (Dst) {
+ assert(
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 &&
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1);
+ SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
+ SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
+ }
+
+ // Initialize src0_sel
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1);
+ SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
+
+
+ // Initialize src1_sel if present
+ if (Src1) {
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1);
+ SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
+ }
+
+ // Apply all sdwa operand pattenrs
+ bool Converted = false;
+ for (auto &Operand : SDWAOperands) {
+ Converted |= Operand->convertToSDWA(*SDWAInst, TII);
+ }
+ if (!Converted) {
+ SDWAInst->eraseFromParent();
+ return false;
+ }
+
+ DEBUG(dbgs() << "Convert instruction:" << MI
+ << "Into:" << *SDWAInst << '\n');
+ ++NumSDWAInstructionsPeepholed;
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+
+ if (!ST.hasSDWA() ||
+ !AMDGPU::isVI(ST)) { // TODO: Add support for SDWA on gfx9
+ return false;
+ }
+
+ MRI = &MF.getRegInfo();
+ TRI = ST.getRegisterInfo();
+ TII = ST.getInstrInfo();
+
+ std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
+
+ matchSDWAOperands(MF);
+
+ for (auto &OperandPair : SDWAOperands) {
+ auto &Operand = OperandPair.second;
+ MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
+ if (PotentialMI) {
+ PotentialMatches[PotentialMI].push_back(std::move(Operand));
+ }
+ }
+
+ for (auto &PotentialPair : PotentialMatches) {
+ MachineInstr &PotentialMI = *PotentialPair.first;
+ convertToSDWA(PotentialMI, PotentialPair.second);
+ }
+
+ SDWAOperands.clear();
+ return false;
+}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index a1ed5e8441df..36d4df52ff0e 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -24,12 +24,6 @@
using namespace llvm;
-static cl::opt<bool> EnableSpillSGPRToSMEM(
- "amdgpu-spill-sgpr-to-smem",
- cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
- cl::init(false));
-
-
static bool hasPressureSet(const int *PSets, unsigned PSetID) {
for (unsigned i = 0; PSets[i] != -1; ++i) {
if (PSets[i] == (int)PSetID)
@@ -49,9 +43,28 @@ void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
}
}
-SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo(),
- SGPRPressureSets(getNumRegPressureSets()),
- VGPRPressureSets(getNumRegPressureSets()) {
+static cl::opt<bool> EnableSpillSGPRToSMEM(
+ "amdgpu-spill-sgpr-to-smem",
+ cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
+ cl::init(false));
+
+static cl::opt<bool> EnableSpillSGPRToVGPR(
+ "amdgpu-spill-sgpr-to-vgpr",
+ cl::desc("Enable spilling VGPRs to SGPRs"),
+ cl::ReallyHidden,
+ cl::init(true));
+
+SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) :
+ AMDGPURegisterInfo(),
+ SGPRPressureSets(getNumRegPressureSets()),
+ VGPRPressureSets(getNumRegPressureSets()),
+ SpillSGPRToVGPR(false),
+ SpillSGPRToSMEM(false) {
+ if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
+ SpillSGPRToSMEM = true;
+ else if (EnableSpillSGPRToVGPR)
+ SpillSGPRToVGPR = true;
+
unsigned NumRegPressureSets = getNumRegPressureSets();
SGPRSetID = NumRegPressureSets;
@@ -97,14 +110,18 @@ void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) co
unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
const MachineFunction &MF) const {
- unsigned BaseIdx = alignDown(getMaxNumSGPRs(MF), 4) - 4;
+
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
}
unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
const MachineFunction &MF) const {
- unsigned RegCount = getMaxNumSGPRs(MF);
+
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ unsigned RegCount = ST.getMaxNumSGPRs(MF);
unsigned Reg;
// Try to place it in a hole after PrivateSegmentbufferReg.
@@ -129,6 +146,12 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::EXEC);
reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
+ // Reserve the memory aperture registers.
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
+
// Reserve Trap Handler registers - support is not implemented in Codegen.
reserveRegisterTuples(Reserved, AMDGPU::TBA);
reserveRegisterTuples(Reserved, AMDGPU::TMA);
@@ -139,14 +162,16 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
- unsigned MaxNumSGPRs = getMaxNumSGPRs(MF);
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+
+ unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
}
- unsigned MaxNumVGPRs = getMaxNumVGPRs(MF);
+ unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
@@ -253,7 +278,6 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
}
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -263,8 +287,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
.addFrameIndex(FrameIdx);
- BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg)
- .addReg(UnusedCarry, RegState::Define | RegState::Dead)
+ TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
.addReg(OffsetReg, RegState::Kill)
.addReg(FIReg);
}
@@ -415,14 +438,14 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg();
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
- .addReg(Reg, getDefRegState(!IsStore))
- .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
- .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addReg(Reg, getDefRegState(!IsStore))
+ .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
+ .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
return true;
}
@@ -545,11 +568,20 @@ static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
}
-void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
+bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
int Index,
- RegScavenger *RS) const {
+ RegScavenger *RS,
+ bool OnlyToVGPR) const {
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MBB->getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
+ = MFI->getSGPRToVGPRSpills(Index);
+ bool SpillToVGPR = !VGPRSpills.empty();
+ if (OnlyToVGPR && !SpillToVGPR)
+ return false;
+
MachineRegisterInfo &MRI = MF->getRegInfo();
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -558,10 +590,11 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
bool IsKill = MI->getOperand(0).isKill();
const DebugLoc &DL = MI->getDebugLoc();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
+ bool SpillToSMEM = spillSGPRToSMEM();
+ if (SpillToSMEM && OnlyToVGPR)
+ return false;
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
@@ -634,9 +667,9 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
continue;
}
- struct SIMachineFunctionInfo::SpilledReg Spill =
- MFI->getSpilledReg(MF, Index, i);
- if (Spill.hasReg()) {
+ if (SpillToVGPR) {
+ SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
+
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
Spill.VGPR)
@@ -647,6 +680,10 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
// frame index, we should delete the frame index when all references to
// it are fixed.
} else {
+ // XXX - Can to VGPR spill fail for some subregisters but not others?
+ if (OnlyToVGPR)
+ return false;
+
// Spill SGPR to a frame index.
// TODO: Should VI try to spill to VGPR and then spill to SMEM?
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -690,22 +727,33 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
MI->eraseFromParent();
MFI->addToSpilledSGPRs(NumSubRegs);
+ return true;
}
-void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
+bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
int Index,
- RegScavenger *RS) const {
+ RegScavenger *RS,
+ bool OnlyToVGPR) const {
MachineFunction *MF = MI->getParent()->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
+ = MFI->getSGPRToVGPRSpills(Index);
+ bool SpillToVGPR = !VGPRSpills.empty();
+ if (OnlyToVGPR && !SpillToVGPR)
+ return false;
+
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const DebugLoc &DL = MI->getDebugLoc();
unsigned SuperReg = MI->getOperand(0).getReg();
- bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
+ bool SpillToSMEM = spillSGPRToSMEM();
+ if (SpillToSMEM && OnlyToVGPR)
+ return false;
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
@@ -773,10 +821,8 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
continue;
}
- SIMachineFunctionInfo::SpilledReg Spill
- = MFI->getSpilledReg(MF, Index, i);
-
- if (Spill.hasReg()) {
+ if (SpillToVGPR) {
+ SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
auto MIB =
BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
SubReg)
@@ -786,6 +832,9 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
if (NumSubRegs > 1)
MIB.addReg(SuperReg, RegState::ImplicitDefine);
} else {
+ if (OnlyToVGPR)
+ return false;
+
// Restore SGPR from a stack slot.
// FIXME: We should use S_LOAD_DWORD here for VI.
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -820,6 +869,32 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
}
MI->eraseFromParent();
+ return true;
+}
+
+/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
+/// a VGPR and the stack slot can be safely eliminated when all other users are
+/// handled.
+bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
+ MachineBasicBlock::iterator MI,
+ int FI,
+ RegScavenger *RS) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::SI_SPILL_S512_SAVE:
+ case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S64_SAVE:
+ case AMDGPU::SI_SPILL_S32_SAVE:
+ return spillSGPR(MI, FI, RS, true);
+ case AMDGPU::SI_SPILL_S512_RESTORE:
+ case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S128_RESTORE:
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ case AMDGPU::SI_SPILL_S32_RESTORE:
+ return restoreSGPR(MI, FI, RS, true);
+ default:
+ llvm_unreachable("not an SGPR spill instruction");
+ }
}
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
@@ -1156,210 +1231,6 @@ SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
return AMDGPU::NoRegister;
}
-unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const {
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return 800;
- return 512;
-}
-
-unsigned SIRegisterInfo::getNumAddressableSGPRs(const SISubtarget &ST) const {
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return 102;
- return 104;
-}
-
-unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST,
- const SIMachineFunctionInfo &MFI) const {
- if (MFI.hasFlatScratchInit()) {
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return 6; // FLAT_SCRATCH, XNACK, VCC (in that order)
-
- if (ST.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
- return 4; // FLAT_SCRATCH, VCC (in that order)
- }
-
- if (ST.isXNACKEnabled())
- return 4; // XNACK, VCC (in that order)
-
- return 2; // VCC.
-}
-
-unsigned SIRegisterInfo::getMinNumSGPRs(const SISubtarget &ST,
- unsigned WavesPerEU) const {
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- switch (WavesPerEU) {
- case 0: return 0;
- case 10: return 0;
- case 9: return 0;
- case 8: return 81;
- default: return 97;
- }
- } else {
- switch (WavesPerEU) {
- case 0: return 0;
- case 10: return 0;
- case 9: return 49;
- case 8: return 57;
- case 7: return 65;
- case 6: return 73;
- case 5: return 81;
- default: return 97;
- }
- }
-}
-
-unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST,
- unsigned WavesPerEU,
- bool Addressable) const {
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- switch (WavesPerEU) {
- case 0: return 80;
- case 10: return 80;
- case 9: return 80;
- case 8: return 96;
- default: return Addressable ? getNumAddressableSGPRs(ST) : 112;
- }
- } else {
- switch (WavesPerEU) {
- case 0: return 48;
- case 10: return 48;
- case 9: return 56;
- case 8: return 64;
- case 7: return 72;
- case 6: return 80;
- case 5: return 96;
- default: return getNumAddressableSGPRs(ST);
- }
- }
-}
-
-unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const {
- const Function &F = *MF.getFunction();
-
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
-
- // Compute maximum number of SGPRs function can use using default/requested
- // minimum number of waves per execution unit.
- std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
- unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false);
- unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true);
-
- // Check if maximum number of SGPRs was explicitly requested using
- // "amdgpu-num-sgpr" attribute.
- if (F.hasFnAttribute("amdgpu-num-sgpr")) {
- unsigned Requested = AMDGPU::getIntegerAttribute(
- F, "amdgpu-num-sgpr", MaxNumSGPRs);
-
- // Make sure requested value does not violate subtarget's specifications.
- if (Requested && (Requested <= getNumReservedSGPRs(ST, MFI)))
- Requested = 0;
-
- // If more SGPRs are required to support the input user/system SGPRs,
- // increase to accommodate them.
- //
- // FIXME: This really ends up using the requested number of SGPRs + number
- // of reserved special registers in total. Theoretically you could re-use
- // the last input registers for these special registers, but this would
- // require a lot of complexity to deal with the weird aliasing.
- unsigned NumInputSGPRs = MFI.getNumPreloadedSGPRs();
- if (Requested && Requested < NumInputSGPRs)
- Requested = NumInputSGPRs;
-
- // Make sure requested value is compatible with values implied by
- // default/requested minimum/maximum number of waves per execution unit.
- if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first, false))
- Requested = 0;
- if (WavesPerEU.second &&
- Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second))
- Requested = 0;
-
- if (Requested)
- MaxNumSGPRs = Requested;
- }
-
- if (ST.hasSGPRInitBug())
- MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
-
- return std::min(MaxNumSGPRs - getNumReservedSGPRs(ST, MFI),
- MaxNumAddressableSGPRs);
-}
-
-unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs(
- const SISubtarget &ST) const {
- if (ST.debuggerReserveRegs())
- return 4;
- return 0;
-}
-
-unsigned SIRegisterInfo::getMinNumVGPRs(unsigned WavesPerEU) const {
- switch (WavesPerEU) {
- case 0: return 0;
- case 10: return 0;
- case 9: return 25;
- case 8: return 29;
- case 7: return 33;
- case 6: return 37;
- case 5: return 41;
- case 4: return 49;
- case 3: return 65;
- case 2: return 85;
- default: return 129;
- }
-}
-
-unsigned SIRegisterInfo::getMaxNumVGPRs(unsigned WavesPerEU) const {
- switch (WavesPerEU) {
- case 0: return 24;
- case 10: return 24;
- case 9: return 28;
- case 8: return 32;
- case 7: return 36;
- case 6: return 40;
- case 5: return 48;
- case 4: return 64;
- case 3: return 84;
- case 2: return 128;
- default: return getTotalNumVGPRs();
- }
-}
-
-unsigned SIRegisterInfo::getMaxNumVGPRs(const MachineFunction &MF) const {
- const Function &F = *MF.getFunction();
-
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
-
- // Compute maximum number of VGPRs function can use using default/requested
- // minimum number of waves per execution unit.
- std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
- unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
-
- // Check if maximum number of VGPRs was explicitly requested using
- // "amdgpu-num-vgpr" attribute.
- if (F.hasFnAttribute("amdgpu-num-vgpr")) {
- unsigned Requested = AMDGPU::getIntegerAttribute(
- F, "amdgpu-num-vgpr", MaxNumVGPRs);
-
- // Make sure requested value does not violate subtarget's specifications.
- if (Requested && Requested <= getNumDebuggerReservedVGPRs(ST))
- Requested = 0;
-
- // Make sure requested value is compatible with values implied by
- // default/requested minimum/maximum number of waves per execution unit.
- if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
- Requested = 0;
- if (WavesPerEU.second &&
- Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
- Requested = 0;
-
- if (Requested)
- MaxNumVGPRs = Requested;
- }
-
- return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST);
-}
-
ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
unsigned EltSize) const {
if (EltSize == 4) {
@@ -1476,3 +1347,62 @@ bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
unsigned Reg) const {
return hasVGPRs(getRegClassForReg(MRI, Reg));
}
+
+bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC) const {
+ unsigned SrcSize = SrcRC->getSize();
+ unsigned DstSize = DstRC->getSize();
+ unsigned NewSize = NewRC->getSize();
+
+ // Do not increase size of registers beyond dword, we would need to allocate
+ // adjacent registers and constraint regalloc more than needed.
+
+ // Always allow dword coalescing.
+ if (SrcSize <= 4 || DstSize <= 4)
+ return true;
+
+ return NewSize <= DstSize || NewSize <= SrcSize;
+}
+
+unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
+ *MF.getFunction());
+ switch (RC->getID()) {
+ default:
+ return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
+ case AMDGPU::VGPR_32RegClassID:
+ return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
+ case AMDGPU::SGPR_32RegClassID:
+ return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
+ }
+}
+
+unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const {
+ if (Idx == getVGPRPressureSet())
+ return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
+ const_cast<MachineFunction &>(MF));
+
+ if (Idx == getSGPRPressureSet())
+ return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
+ const_cast<MachineFunction &>(MF));
+
+ return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
+}
+
+const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
+ static const int Empty[] = { -1 };
+
+ if (hasRegUnit(AMDGPU::M0, RegUnit))
+ return Empty;
+ return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
+}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index 0bcae7d9840c..679ed229758a 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -21,8 +21,8 @@
namespace llvm {
-class SISubtarget;
class MachineRegisterInfo;
+class SISubtarget;
class SIMachineFunctionInfo;
class SIRegisterInfo final : public AMDGPURegisterInfo {
@@ -31,13 +31,22 @@ private:
unsigned VGPRSetID;
BitVector SGPRPressureSets;
BitVector VGPRPressureSets;
+ bool SpillSGPRToVGPR;
+ bool SpillSGPRToSMEM;
void reserveRegisterTuples(BitVector &, unsigned Reg) const;
void classifyPressureSet(unsigned PSetID, unsigned Reg,
BitVector &PressureSets) const;
-
public:
- SIRegisterInfo();
+ SIRegisterInfo(const SISubtarget &ST);
+
+ bool spillSGPRToVGPR() const {
+ return SpillSGPRToVGPR;
+ }
+
+ bool spillSGPRToSMEM() const {
+ return SpillSGPRToSMEM;
+ }
/// Return the end register initially reserved for the scratch buffer in case
/// spilling is needed.
@@ -78,16 +87,22 @@ public:
const TargetRegisterClass *getPointerRegClass(
const MachineFunction &MF, unsigned Kind = 0) const override;
- void spillSGPR(MachineBasicBlock::iterator MI,
- int FI, RegScavenger *RS) const;
+ /// If \p OnlyToVGPR is true, this will only succeed if this
+ bool spillSGPR(MachineBasicBlock::iterator MI,
+ int FI, RegScavenger *RS,
+ bool OnlyToVGPR = false) const;
- void restoreSGPR(MachineBasicBlock::iterator MI,
- int FI, RegScavenger *RS) const;
+ bool restoreSGPR(MachineBasicBlock::iterator MI,
+ int FI, RegScavenger *RS,
+ bool OnlyToVGPR = false) const;
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS) const override;
+ bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI,
+ int FI, RegScavenger *RS) const;
+
unsigned getHWRegIndex(unsigned Reg) const {
return getEncodingValue(Reg) & 0xff;
}
@@ -195,74 +210,23 @@ public:
return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID);
}
- /// \returns SGPR allocation granularity supported by the subtarget.
- unsigned getSGPRAllocGranule() const {
- return 8;
- }
-
- /// \returns Total number of SGPRs supported by the subtarget.
- unsigned getTotalNumSGPRs(const SISubtarget &ST) const;
-
- /// \returns Number of addressable SGPRs supported by the subtarget.
- unsigned getNumAddressableSGPRs(const SISubtarget &ST) const;
-
- /// \returns Number of reserved SGPRs supported by the subtarget.
- unsigned getNumReservedSGPRs(const SISubtarget &ST,
- const SIMachineFunctionInfo &MFI) const;
-
- /// \returns Minimum number of SGPRs that meets given number of waves per
- /// execution unit requirement for given subtarget.
- unsigned getMinNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const;
-
- /// \returns Maximum number of SGPRs that meets given number of waves per
- /// execution unit requirement for given subtarget.
- unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU,
- bool Addressable) const;
-
- /// \returns Maximum number of SGPRs that meets number of waves per execution
- /// unit requirement for function \p MF, or number of SGPRs explicitly
- /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
- ///
- /// \returns Value that meets number of waves per execution unit requirement
- /// if explicitly requested value cannot be converted to integer, violates
- /// subtarget's specifications, or does not meet number of waves per execution
- /// unit requirement.
- unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
-
- /// \returns VGPR allocation granularity supported by the subtarget.
- unsigned getVGPRAllocGranule() const {
- return 4;
- }
-
- /// \returns Total number of VGPRs supported by the subtarget.
- unsigned getTotalNumVGPRs() const {
- return 256;
- }
-
- /// \returns Number of reserved VGPRs for debugger use supported by the
- /// subtarget.
- unsigned getNumDebuggerReservedVGPRs(const SISubtarget &ST) const;
+ ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
+ unsigned EltSize) const;
- /// \returns Minimum number of SGPRs that meets given number of waves per
- /// execution unit requirement.
- unsigned getMinNumVGPRs(unsigned WavesPerEU) const;
+ bool shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC) const override;
- /// \returns Maximum number of VGPRs that meets given number of waves per
- /// execution unit requirement.
- unsigned getMaxNumVGPRs(unsigned WavesPerEU) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const override;
- /// \returns Maximum number of VGPRs that meets number of waves per execution
- /// unit requirement for function \p MF, or number of VGPRs explicitly
- /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
- ///
- /// \returns Value that meets number of waves per execution unit requirement
- /// if explicitly requested value cannot be converted to integer, violates
- /// subtarget's specifications, or does not meet number of waves per execution
- /// unit requirement.
- unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
+ unsigned getRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const override;
- ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
- unsigned EltSize) const;
+ const int *getRegUnitPressureSets(unsigned RegUnit) const override;
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td
index 31e714b9f6b9..fc808011cd88 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -44,6 +44,11 @@ def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
def SCC : SIReg<"scc", 253>;
def M0 : SIReg <"m0", 124>;
+def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>;
+def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>;
+def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
+def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
+
// Trap handler registers
def TBA_LO : SIReg<"tba_lo", 108>;
def TBA_HI : SIReg<"tba_hi", 109>;
@@ -128,7 +133,7 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
// TODO: Do we need to set DwarfRegAlias on register tuples?
// SGPR 32-bit registers
-def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "SGPR%u", 0, 103))> {
// Give all SGPR classes higher priority than VGPR classes, because
// we want to spill SGPRs to VGPRs.
@@ -179,7 +184,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
(add (decimate (shl SGPR_32, 15), 4))]>;
// Trap handler TMP 32-bit registers
-def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
(add (sequence "TTMP%u", 0, 11))> {
let isAllocatable = 0;
}
@@ -197,7 +202,8 @@ def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
(add (decimate (shl TTMP_32, 3), 4))]>;
// VGPR 32-bit registers
-def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+// i16/f16 only on VI+
+def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "VGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
@@ -258,19 +264,20 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
-def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
- TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)> {
+ TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
+ SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
let AllocationPriority = 7;
}
-def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 7;
}
// Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 7;
}
@@ -319,7 +326,7 @@ def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
let AllocationPriority = 11;
}
-def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> {
+def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512)> {
// Requires 8 s_mov_b64 to copy
let CopyCost = 8;
let AllocationPriority = 12;
@@ -366,7 +373,7 @@ def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
let Size = 32;
}
-def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, SReg_32)> {
let isAllocatable = 0;
}
@@ -417,6 +424,18 @@ multiclass SIRegOperand <string rc, string MatchName, string opType> {
let OperandType = opType#"_FP64";
let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
}
+
+ def _v2b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ let OperandType = opType#"_V2INT16";
+ let ParserMatchClass = RegImmMatcher<MatchName#"V2B16">;
+ let DecoderMethod = "decodeOperand_VSrcV216";
+ }
+
+ def _v2f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ let OperandType = opType#"_V2FP16";
+ let ParserMatchClass = RegImmMatcher<MatchName#"V2F16">;
+ let DecoderMethod = "decodeOperand_VSrcV216";
+ }
}
}
diff --git a/lib/Target/AMDGPU/SISchedule.td b/lib/Target/AMDGPU/SISchedule.td
index be27966fd5f1..0f02f5825cb0 100644
--- a/lib/Target/AMDGPU/SISchedule.td
+++ b/lib/Target/AMDGPU/SISchedule.td
@@ -53,6 +53,11 @@ class SISchedMachineModel : SchedMachineModel {
let MicroOpBufferSize = 1;
let IssueWidth = 1;
let PostRAScheduler = 1;
+
+ // FIXME:Approximate 2 * branch cost. Try to hack around bad
+ // early-ifcvt heuristics. These need improvement to avoid the OOE
+ // heuristics.
+ int MispredictPenalty = 20;
}
def SIFullSpeedModel : SISchedMachineModel;
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index dd31dc690840..c5f121757e62 100644
--- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -497,24 +497,24 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
if (Op32DstIdx != -1) {
// dst
- Inst32.addOperand(MI.getOperand(0));
+ Inst32.add(MI.getOperand(0));
} else {
assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
"Unexpected case");
}
- Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
+ Inst32.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
const MachineOperand *Src1 =
TII->getNamedOperand(MI, AMDGPU::OpName::src1);
if (Src1)
- Inst32.addOperand(*Src1);
+ Inst32.add(*Src1);
if (Src2) {
int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
if (Op32Src2Idx != -1) {
- Inst32.addOperand(*Src2);
+ Inst32.add(*Src2);
} else {
// In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
// replaced with an implicit read of vcc. This was already added
diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td
index 02656483cd74..5b840a14dbc3 100644
--- a/lib/Target/AMDGPU/SMInstructions.td
+++ b/lib/Target/AMDGPU/SMInstructions.td
@@ -226,9 +226,9 @@ def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
auto Ld = cast<LoadSDNode>(N);
return Ld->getAlignment() >= 4 &&
- ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+ ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
- (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
}]>;
@@ -293,12 +293,6 @@ def : Pat <
let Predicates = [isVI] in {
-// 1. Offset as 20bit DWORD immediate
-def : Pat <
- (SIload_constant v4i32:$sbase, IMM20bit:$offset),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset), 0)
->;
-
def : Pat <
(i64 (readcyclecounter)),
(S_MEMREALTIME)
diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td
index 73cd5774128e..b4adbdd1df07 100644
--- a/lib/Target/AMDGPU/SOPInstructions.td
+++ b/lib/Target/AMDGPU/SOPInstructions.td
@@ -82,6 +82,12 @@ class SOP1_0_32 <string opName, list<dag> pattern = []> : SOP1_Pseudo <
let has_sdst = 0;
}
+class SOP1_0_32R <string opName, list<dag> pattern = []> : SOP1_Pseudo <
+ opName, (outs), (ins SReg_32:$src0),
+ "$src0", pattern> {
+ let has_sdst = 0;
+}
+
class SOP1_64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0),
"$sdst, $src0", pattern
@@ -210,7 +216,7 @@ def S_MOVRELD_B32 : SOP1_32 <"s_movreld_b32">;
def S_MOVRELD_B64 : SOP1_64 <"s_movreld_b64">;
} // End Uses = [M0]
-def S_CBRANCH_JOIN : SOP1_1 <"s_cbranch_join">;
+def S_CBRANCH_JOIN : SOP1_0_32R <"s_cbranch_join">;
def S_MOV_REGRD_B32 : SOP1_32 <"s_mov_regrd_b32">;
let Defs = [SCC] in {
def S_ABS_I32 : SOP1_32 <"s_abs_i32">;
@@ -428,7 +434,7 @@ def S_BFE_I64 : SOP2_64_32 <"s_bfe_i64">;
def S_CBRANCH_G_FORK : SOP2_Pseudo <
"s_cbranch_g_fork", (outs),
- (ins SReg_64:$src0, SReg_64:$src1),
+ (ins SCSrc_b64:$src0, SCSrc_b64:$src1),
"$src0, $src1"
> {
let has_sdst = 0;
@@ -438,6 +444,22 @@ let Defs = [SCC] in {
def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">;
} // End Defs = [SCC]
+let SubtargetPredicate = isVI in {
+ def S_RFE_RESTORE_B64 : SOP2_Pseudo <
+ "s_rfe_restore_b64", (outs),
+ (ins SSrc_b64:$src0, SSrc_b32:$src1),
+ "$src0, $src1"
+ > {
+ let hasSideEffects = 1;
+ let has_sdst = 0;
+ }
+}
+
+let SubtargetPredicate = isGFX9 in {
+ def S_PACK_LL_B32_B16 : SOP2_32<"s_pack_ll_b32_b16">;
+ def S_PACK_LH_B32_B16 : SOP2_32<"s_pack_lh_b32_b16">;
+ def S_PACK_HH_B32_B16 : SOP2_32<"s_pack_hh_b32_b16">;
+}
//===----------------------------------------------------------------------===//
// SOPK Instructions
@@ -751,6 +773,14 @@ def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm",
let isReturn = 1;
}
+let SubtargetPredicate = isVI in {
+def S_ENDPGM_SAVED : SOPP <0x0000001B, (ins), "s_endpgm_saved"> {
+ let simm16 = 0;
+ let isBarrier = 1;
+ let isReturn = 1;
+}
+}
+
let isBranch = 1, SchedRW = [WriteBranch] in {
def S_BRANCH : SOPP <
0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
@@ -792,6 +822,25 @@ def S_CBRANCH_EXECNZ : SOPP <
>;
} // End Uses = [EXEC]
+def S_CBRANCH_CDBGSYS : SOPP <
+ 0x00000017, (ins sopp_brtarget:$simm16),
+ "s_cbranch_cdbgsys $simm16"
+>;
+
+def S_CBRANCH_CDBGSYS_AND_USER : SOPP <
+ 0x0000001A, (ins sopp_brtarget:$simm16),
+ "s_cbranch_cdbgsys_and_user $simm16"
+>;
+
+def S_CBRANCH_CDBGSYS_OR_USER : SOPP <
+ 0x00000019, (ins sopp_brtarget:$simm16),
+ "s_cbranch_cdbgsys_or_user $simm16"
+>;
+
+def S_CBRANCH_CDBGUSER : SOPP <
+ 0x00000018, (ins sopp_brtarget:$simm16),
+ "s_cbranch_cdbguser $simm16"
+>;
} // End isBranch = 1
} // End isTerminator = 1
@@ -806,9 +855,18 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
let isConvergent = 1;
}
+let SubtargetPredicate = isVI in {
+def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
+ let simm16 = 0;
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+}
+
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
+def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
// On SI the documentation says sleep for approximately 64 * low 2
// bits, consistent with the reported maximum of 448. On VI the
@@ -1207,6 +1265,10 @@ def S_BFE_U64_vi : SOP2_Real_vi <0x27, S_BFE_U64>;
def S_BFE_I64_vi : SOP2_Real_vi <0x28, S_BFE_I64>;
def S_CBRANCH_G_FORK_vi : SOP2_Real_vi <0x29, S_CBRANCH_G_FORK>;
def S_ABSDIFF_I32_vi : SOP2_Real_vi <0x2a, S_ABSDIFF_I32>;
+def S_PACK_LL_B32_B16_vi : SOP2_Real_vi <0x32, S_PACK_LL_B32_B16>;
+def S_PACK_LH_B32_B16_vi : SOP2_Real_vi <0x33, S_PACK_LH_B32_B16>;
+def S_PACK_HH_B32_B16_vi : SOP2_Real_vi <0x34, S_PACK_HH_B32_B16>;
+def S_RFE_RESTORE_B64_vi : SOP2_Real_vi <0x2b, S_RFE_RESTORE_B64>;
def S_MOVK_I32_vi : SOPK_Real_vi <0x00, S_MOVK_I32>;
def S_CMOVK_I32_vi : SOPK_Real_vi <0x01, S_CMOVK_I32>;
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5f651d4da5d2..86095a8e1142 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
+//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,32 +6,42 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUBaseInfo.h"
+
#include "AMDGPU.h"
+#include "AMDGPUBaseInfo.h"
#include "SIDefines.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <utility>
-#define GET_SUBTARGETINFO_ENUM
-#include "AMDGPUGenSubtargetInfo.inc"
-#undef GET_SUBTARGETINFO_ENUM
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#define GET_REGINFO_ENUM
-#include "AMDGPUGenRegisterInfo.inc"
-#undef GET_REGINFO_ENUM
#define GET_INSTRINFO_NAMED_OPS
-#define GET_INSTRINFO_ENUM
#include "AMDGPUGenInstrInfo.inc"
#undef GET_INSTRINFO_NAMED_OPS
-#undef GET_INSTRINFO_ENUM
namespace {
@@ -56,11 +66,11 @@ unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
return (Src & getBitMask(Shift, Width)) >> Shift;
}
-/// \returns Vmcnt bit shift.
-unsigned getVmcntBitShift() { return 0; }
+/// \returns Vmcnt bit shift (lower bits).
+unsigned getVmcntBitShiftLo() { return 0; }
-/// \returns Vmcnt bit width.
-unsigned getVmcntBitWidth() { return 4; }
+/// \returns Vmcnt bit width (lower bits).
+unsigned getVmcntBitWidthLo() { return 4; }
/// \returns Expcnt bit shift.
unsigned getExpcntBitShift() { return 4; }
@@ -74,52 +84,224 @@ unsigned getLgkmcntBitShift() { return 8; }
/// \returns Lgkmcnt bit width.
unsigned getLgkmcntBitWidth() { return 4; }
-} // anonymous namespace
+/// \returns Vmcnt bit shift (higher bits).
+unsigned getVmcntBitShiftHi() { return 14; }
+
+/// \returns Vmcnt bit width (higher bits).
+unsigned getVmcntBitWidthHi() { return 2; }
+
+} // end namespace anonymous
namespace llvm {
namespace AMDGPU {
-IsaVersion getIsaVersion(const FeatureBitset &Features) {
+namespace IsaInfo {
+IsaVersion getIsaVersion(const FeatureBitset &Features) {
+ // CI.
if (Features.test(FeatureISAVersion7_0_0))
return {7, 0, 0};
-
if (Features.test(FeatureISAVersion7_0_1))
return {7, 0, 1};
-
if (Features.test(FeatureISAVersion7_0_2))
return {7, 0, 2};
+ // VI.
if (Features.test(FeatureISAVersion8_0_0))
return {8, 0, 0};
-
if (Features.test(FeatureISAVersion8_0_1))
return {8, 0, 1};
-
if (Features.test(FeatureISAVersion8_0_2))
return {8, 0, 2};
-
if (Features.test(FeatureISAVersion8_0_3))
return {8, 0, 3};
-
if (Features.test(FeatureISAVersion8_0_4))
return {8, 0, 4};
-
if (Features.test(FeatureISAVersion8_1_0))
return {8, 1, 0};
- return {0, 0, 0};
+ // GFX9.
+ if (Features.test(FeatureISAVersion9_0_0))
+ return {9, 0, 0};
+ if (Features.test(FeatureISAVersion9_0_1))
+ return {9, 0, 1};
+
+ if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
+ return {0, 0, 0};
+ return {7, 0, 0};
+}
+
+unsigned getWavefrontSize(const FeatureBitset &Features) {
+ if (Features.test(FeatureWavefrontSize16))
+ return 16;
+ if (Features.test(FeatureWavefrontSize32))
+ return 32;
+
+ return 64;
+}
+
+unsigned getLocalMemorySize(const FeatureBitset &Features) {
+ if (Features.test(FeatureLocalMemorySize32768))
+ return 32768;
+ if (Features.test(FeatureLocalMemorySize65536))
+ return 65536;
+
+ return 0;
+}
+
+unsigned getEUsPerCU(const FeatureBitset &Features) {
+ return 4;
+}
+
+unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ if (!Features.test(FeatureGCN))
+ return 8;
+ unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ if (N == 1)
+ return 40;
+ N = 40 / N;
+ return std::min(N, 16u);
+}
+
+unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
+ return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
+}
+
+unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+}
+
+unsigned getMinWavesPerEU(const FeatureBitset &Features) {
+ return 1;
+}
+
+unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
+ if (!Features.test(FeatureGCN))
+ return 8;
+ // FIXME: Need to take scratch memory into account.
+ return 10;
+}
+
+unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
+ getEUsPerCU(Features)) / getEUsPerCU(Features);
+}
+
+unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
+ return 1;
+}
+
+unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
+ return 2048;
+}
+
+unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
+ getWavefrontSize(Features);
+}
+
+unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8)
+ return 16;
+ return 8;
+}
+
+unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
+ return 8;
+}
+
+unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8)
+ return 800;
+ return 512;
+}
+
+unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
+ if (Features.test(FeatureSGPRInitBug))
+ return FIXED_NUM_SGPRS_FOR_INIT_BUG;
+
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8)
+ return 102;
+ return 104;
+}
+
+unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+
+ if (WavesPerEU >= getMaxWavesPerEU(Features))
+ return 0;
+ unsigned MinNumSGPRs =
+ alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
+ getSGPRAllocGranule(Features)) + 1;
+ return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
+}
+
+unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+ bool Addressable) {
+ assert(WavesPerEU != 0);
+
+ IsaVersion Version = getIsaVersion(Features);
+ unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
+ getSGPRAllocGranule(Features));
+ unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
+ if (Version.Major >= 8 && !Addressable)
+ AddressableNumSGPRs = 112;
+ return std::min(MaxNumSGPRs, AddressableNumSGPRs);
+}
+
+unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
+ return 4;
+}
+
+unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
+ return getVGPRAllocGranule(Features);
+}
+
+unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
+ return 256;
}
+unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
+ return getTotalNumVGPRs(Features);
+}
+
+unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+
+ if (WavesPerEU >= getMaxWavesPerEU(Features))
+ return 0;
+ unsigned MinNumVGPRs =
+ alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
+ getVGPRAllocGranule(Features)) + 1;
+ return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
+}
+
+unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+
+ unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
+ getVGPRAllocGranule(Features));
+ unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
+ return std::min(MaxNumVGPRs, AddressableNumVGPRs);
+}
+
+} // end namespace IsaInfo
+
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features) {
-
- IsaVersion ISA = getIsaVersion(Features);
+ IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
memset(&Header, 0, sizeof(Header));
Header.amd_kernel_code_version_major = 1;
- Header.amd_kernel_code_version_minor = 0;
+ Header.amd_kernel_code_version_minor = 1;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
Header.amd_machine_version_major = ISA.Major;
Header.amd_machine_version_minor = ISA.Minor;
@@ -127,6 +309,11 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Header.kernel_code_entry_byte_offset = sizeof(Header);
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
Header.wavefront_size = 6;
+
+ // If the code object does not support indirect functions, then the value must
+ // be 0xffffffff.
+ Header.call_convention = -1;
+
// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
Header.kernarg_segment_alignment = 4;
@@ -161,16 +348,16 @@ MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
ELF::SHF_AMDGPU_HSA_AGENT);
}
-bool isGroupSegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
+ return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
}
-bool isGlobalSegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) {
+ return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS;
}
-bool isReadOnlySegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) {
+ return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS;
}
bool shouldEmitConstantsToTextSection(const Triple &TT) {
@@ -208,7 +395,7 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
return Default;
}
if (Strs.second.trim().getAsInteger(0, Ints.second)) {
- if (!OnlyFirstRequired || Strs.second.trim().size()) {
+ if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
Ctx.emitError("can't parse second integer attribute " + Name);
return Default;
}
@@ -217,57 +404,84 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
return Ints;
}
-unsigned getWaitcntBitMask(IsaVersion Version) {
- unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
- unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
- unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
- return Vmcnt | Expcnt | Lgkmcnt;
-}
+unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
+ unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
+ if (Version.Major < 9)
+ return VmcntLo;
-unsigned getVmcntBitMask(IsaVersion Version) {
- return (1 << getVmcntBitWidth()) - 1;
+ unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
+ return VmcntLo | VmcntHi;
}
-unsigned getExpcntBitMask(IsaVersion Version) {
+unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getExpcntBitWidth()) - 1;
}
-unsigned getLgkmcntBitMask(IsaVersion Version) {
+unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getLgkmcntBitWidth()) - 1;
}
-unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
- return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
+unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
+ unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
+ unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
+ unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
+ if (Version.Major < 9)
+ return Waitcnt;
+
+ unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
+ return Waitcnt | VmcntHi;
+}
+
+unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+ unsigned VmcntLo =
+ unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ if (Version.Major < 9)
+ return VmcntLo;
+
+ unsigned VmcntHi =
+ unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
+ VmcntHi <<= getVmcntBitWidthLo();
+ return VmcntLo | VmcntHi;
}
-unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
+unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
+unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
Vmcnt = decodeVmcnt(Version, Waitcnt);
Expcnt = decodeExpcnt(Version, Waitcnt);
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
}
-unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
- return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
+unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Vmcnt) {
+ Waitcnt =
+ packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ if (Version.Major < 9)
+ return Waitcnt;
+
+ Vmcnt >>= getVmcntBitWidthLo();
+ return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
}
-unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
+unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Expcnt) {
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
+unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Lgkmcnt) {
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-unsigned encodeWaitcnt(IsaVersion Version,
+unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
@@ -296,6 +510,10 @@ bool isCompute(CallingConv::ID cc) {
return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
}
+bool isEntryFunctionCC(CallingConv::ID CC) {
+ return true;
+}
+
bool isSI(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
}
@@ -327,13 +545,34 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
return Reg;
}
+unsigned mc2PseudoReg(unsigned Reg) {
+ switch (Reg) {
+ case AMDGPU::FLAT_SCR_ci:
+ case AMDGPU::FLAT_SCR_vi:
+ return FLAT_SCR;
+
+ case AMDGPU::FLAT_SCR_LO_ci:
+ case AMDGPU::FLAT_SCR_LO_vi:
+ return AMDGPU::FLAT_SCR_LO;
+
+ case AMDGPU::FLAT_SCR_HI_ci:
+ case AMDGPU::FLAT_SCR_HI_vi:
+ return AMDGPU::FLAT_SCR_HI;
+
+ default:
+ return Reg;
+ }
+}
+
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
OpType <= AMDGPU::OPERAND_SRC_LAST;
}
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
switch (OpType) {
case AMDGPU::OPERAND_REG_IMM_FP32:
@@ -342,6 +581,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return true;
default:
return false;
@@ -349,6 +589,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
}
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
@@ -392,6 +633,7 @@ unsigned getRegBitWidth(const MCRegisterClass &RC) {
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
unsigned RCID = Desc.OpInfo[OpNo].RegClass;
return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
}
@@ -440,7 +682,8 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
}
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
- assert(HasInv2Pi);
+ if (!HasInv2Pi)
+ return false;
if (Literal >= -16 && Literal <= 64)
return true;
@@ -457,5 +700,92 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
Val == 0x3118; // 1/2pi
}
-} // End namespace AMDGPU
-} // End namespace llvm
+bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
+ assert(HasInv2Pi);
+
+ int16_t Lo16 = static_cast<int16_t>(Literal);
+ int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
+ return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
+}
+
+bool isUniformMMO(const MachineMemOperand *MMO) {
+ const Value *Ptr = MMO->getValue();
+ // UndefValue means this is a load of a kernel input. These are uniform.
+ // Sometimes LDS instructions have constant pointers.
+ // If Ptr is null, then that means this mem operand contains a
+ // PseudoSourceValue like GOT.
+ if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
+ isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
+ return true;
+
+ const Instruction *I = dyn_cast<Instruction>(Ptr);
+ return I && I->getMetadata("amdgpu.uniform");
+}
+
+int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
+ if (isSI(ST) || isCI(ST))
+ return ByteOffset >> 2;
+
+ return ByteOffset;
+}
+
+bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
+ int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
+ return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
+ isUInt<20>(EncodedOffset);
+}
+} // end namespace AMDGPU
+
+} // end namespace llvm
+
+const unsigned AMDGPUAS::MAX_COMMON_ADDRESS;
+const unsigned AMDGPUAS::GLOBAL_ADDRESS;
+const unsigned AMDGPUAS::LOCAL_ADDRESS;
+const unsigned AMDGPUAS::PARAM_D_ADDRESS;
+const unsigned AMDGPUAS::PARAM_I_ADDRESS;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_0;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_1;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_2;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_3;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_4;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_5;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_6;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_7;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_8;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_9;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_10;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_11;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_12;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_13;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_14;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_15;
+const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+
+namespace llvm {
+namespace AMDGPU {
+
+AMDGPUAS getAMDGPUAS(Triple T) {
+ auto Env = T.getEnvironmentName();
+ AMDGPUAS AS;
+ if (Env == "amdgiz" || Env == "amdgizcl") {
+ AS.FLAT_ADDRESS = 0;
+ AS.PRIVATE_ADDRESS = 5;
+ AS.REGION_ADDRESS = 4;
+ }
+ else {
+ AS.FLAT_ADDRESS = 4;
+ AS.PRIVATE_ADDRESS = 0;
+ AS.REGION_ADDRESS = 5;
+ }
+ return AS;
+}
+
+AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
+ return getAMDGPUAS(M.getTargetTriple());
+}
+
+AMDGPUAS getAMDGPUAS(const Module &M) {
+ return getAMDGPUAS(Triple(M.getTargetTriple()));
+}
+} // namespace AMDGPU
+} // namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index ea5fc366d205..d6c836eb748b 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUBaseInfo.h - Top level definitions for AMDGPU -----*- C++ -*-===//
+//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,39 +10,143 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
+#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
-#include "llvm/IR/CallingConv.h"
-
#include "SIDefines.h"
-
-#define GET_INSTRINFO_OPERAND_ENUM
-#include "AMDGPUGenInstrInfo.inc"
-#undef GET_INSTRINFO_OPERAND_ENUM
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
+#include <utility>
namespace llvm {
class FeatureBitset;
class Function;
class GlobalValue;
+class MachineMemOperand;
class MCContext;
-class MCInstrDesc;
class MCRegisterClass;
class MCRegisterInfo;
class MCSection;
class MCSubtargetInfo;
+class Triple;
namespace AMDGPU {
+namespace IsaInfo {
-LLVM_READONLY
-int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+enum {
+ // The closed Vulkan driver sets 96, which limits the wave count to 8 but
+ // doesn't spill SGPRs as much as when 80 is set.
+ FIXED_NUM_SGPRS_FOR_INIT_BUG = 96
+};
+/// \brief Instruction set architecture version.
struct IsaVersion {
unsigned Major;
unsigned Minor;
unsigned Stepping;
};
+/// \returns Isa version for given subtarget \p Features.
IsaVersion getIsaVersion(const FeatureBitset &Features);
+
+/// \returns Wavefront size for given subtarget \p Features.
+unsigned getWavefrontSize(const FeatureBitset &Features);
+
+/// \returns Local memory size in bytes for given subtarget \p Features.
+unsigned getLocalMemorySize(const FeatureBitset &Features);
+
+/// \returns Number of execution units per compute unit for given subtarget \p
+/// Features.
+unsigned getEUsPerCU(const FeatureBitset &Features);
+
+/// \returns Maximum number of work groups per compute unit for given subtarget
+/// \p Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Maximum number of waves per compute unit for given subtarget \p
+/// Features without any kind of limitation.
+unsigned getMaxWavesPerCU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per compute unit for given subtarget \p
+/// Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Minimum number of waves per execution unit for given subtarget \p
+/// Features.
+unsigned getMinWavesPerEU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per execution unit for given subtarget \p
+/// Features without any kind of limitation.
+unsigned getMaxWavesPerEU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per execution unit for given subtarget \p
+/// Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Minimum flat work group size for given subtarget \p Features.
+unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
+
+/// \returns Maximum flat work group size for given subtarget \p Features.
+unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
+
+/// \returns Number of waves per work group for given subtarget \p Features and
+/// limited by given \p FlatWorkGroupSize.
+unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns SGPR allocation granularity for given subtarget \p Features.
+unsigned getSGPRAllocGranule(const FeatureBitset &Features);
+
+/// \returns SGPR encoding granularity for given subtarget \p Features.
+unsigned getSGPREncodingGranule(const FeatureBitset &Features);
+
+/// \returns Total number of SGPRs for given subtarget \p Features.
+unsigned getTotalNumSGPRs(const FeatureBitset &Features);
+
+/// \returns Addressable number of SGPRs for given subtarget \p Features.
+unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
+
+/// \returns Minimum number of SGPRs that meets the given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+/// \returns Maximum number of SGPRs that meets the given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+ bool Addressable);
+
+/// \returns VGPR allocation granularity for given subtarget \p Features.
+unsigned getVGPRAllocGranule(const FeatureBitset &Features);
+
+/// \returns VGPR encoding granularity for given subtarget \p Features.
+unsigned getVGPREncodingGranule(const FeatureBitset &Features);
+
+/// \returns Total number of VGPRs for given subtarget \p Features.
+unsigned getTotalNumVGPRs(const FeatureBitset &Features);
+
+/// \returns Addressable number of VGPRs for given subtarget \p Features.
+unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
+
+/// \returns Minimum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+/// \returns Maximum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+} // end namespace IsaInfo
+
+LLVM_READONLY
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features);
MCSection *getHSATextSection(MCContext &Ctx);
@@ -53,9 +157,9 @@ MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
-bool isGroupSegment(const GlobalValue *GV);
-bool isGlobalSegment(const GlobalValue *GV);
-bool isReadOnlySegment(const GlobalValue *GV);
+bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS);
+bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS);
+bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS);
/// \returns True if constants should be emitted to .text section for given
/// target triple \p TT, false otherwise.
@@ -83,64 +187,89 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
std::pair<int, int> Default,
bool OnlyFirstRequired = false);
-/// \returns Waitcnt bit mask for given isa \p Version.
-unsigned getWaitcntBitMask(IsaVersion Version);
-
/// \returns Vmcnt bit mask for given isa \p Version.
-unsigned getVmcntBitMask(IsaVersion Version);
+unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Expcnt bit mask for given isa \p Version.
-unsigned getExpcntBitMask(IsaVersion Version);
+unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Lgkmcnt bit mask for given isa \p Version.
-unsigned getLgkmcntBitMask(IsaVersion Version);
+unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
+
+/// \returns Waitcnt bit mask for given isa \p Version.
+unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
/// \p Lgkmcnt respectively.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
-/// \p Vmcnt = \p Waitcnt[3:0]
+/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
+/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
/// \p Expcnt = \p Waitcnt[6:4]
/// \p Lgkmcnt = \p Waitcnt[11:8]
-void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt);
+unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Vmcnt);
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt);
+unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Expcnt);
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt);
+unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Lgkmcnt);
/// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
/// \p Version.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
-/// Waitcnt[3:0] = \p Vmcnt
-/// Waitcnt[6:4] = \p Expcnt
-/// Waitcnt[11:8] = \p Lgkmcnt
+/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
+/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
+/// Waitcnt[6:4] = \p Expcnt
+/// Waitcnt[11:8] = \p Lgkmcnt
+/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
/// isa \p Version.
-unsigned encodeWaitcnt(IsaVersion Version,
+unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
unsigned getInitialPSInputAddr(const Function &F);
-bool isShader(CallingConv::ID cc);
-bool isCompute(CallingConv::ID cc);
+LLVM_READNONE
+bool isShader(CallingConv::ID CC);
+
+LLVM_READNONE
+bool isCompute(CallingConv::ID CC);
+
+LLVM_READNONE
+bool isEntryFunctionCC(CallingConv::ID CC);
+
+// FIXME: Remove this when calling conventions cleaned up
+LLVM_READNONE
+inline bool isKernel(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return true;
+ default:
+ return false;
+ }
+}
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
@@ -150,6 +279,10 @@ bool isVI(const MCSubtargetInfo &STI);
/// \p STI otherwise return \p Reg.
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
+/// \brief Convert hardware register \p Reg to a pseudo register
+LLVM_READNONE
+unsigned mc2PseudoReg(unsigned Reg);
+
/// \brief Can this operand also contain immediate values?
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
@@ -188,6 +321,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return 2;
default:
@@ -210,7 +345,21 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
LLVM_READNONE
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
+LLVM_READNONE
+bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
+
+bool isUniformMMO(const MachineMemOperand *MMO);
+
+/// \returns The encoding that will be used for \p ByteOffset in the SMRD
+/// offset field.
+int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+
+/// \returns true if this offset is small enough to fit in the SMRD
+/// offset field. \p ByteOffset should be the offset in bytes and
+/// not the encoded offset.
+bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+
} // end namespace AMDGPU
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
diff --git a/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index c55eaab077d1..991408c81c92 100644
--- a/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -87,7 +87,7 @@ COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE
// TODO: cdbg_user
COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
COMPPGM2(user_sgpr_count, compute_pgm_rsrc2_user_sgpr, USER_SGPR),
-// TODO: enable_trap_handler
+COMPPGM2(enable_trap_handler, compute_pgm_rsrc2_trap_handler, TRAP_HANDLER),
COMPPGM2(enable_sgpr_workgroup_id_x, compute_pgm_rsrc2_tgid_x_en, TGID_X_EN),
COMPPGM2(enable_sgpr_workgroup_id_y, compute_pgm_rsrc2_tgid_y_en, TGID_Y_EN),
COMPPGM2(enable_sgpr_workgroup_id_z, compute_pgm_rsrc2_tgid_z_en, TGID_Z_EN),
diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td
index 8cae83cd9d1a..1febc6bf8ec2 100644
--- a/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/lib/Target/AMDGPU/VOP1Instructions.td
@@ -23,18 +23,18 @@ class VOP1e <bits<8> op, VOPProfile P> : Enc32 {
class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> {
bits<8> vdst;
-
+
let Inst{8-0} = 0xf9; // sdwa
let Inst{16-9} = op;
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
let Inst{31-25} = 0x3f; // encoding
}
-class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
+class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> :
InstSI <P.Outs32, P.Ins32, "", pattern>,
VOP <opName>,
- SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>,
- MnemonicAlias<opName#"_e32", opName> {
+ SIMCInstr <!if(VOP1Only, opName, opName#"_e32"), SIEncodingFamily.NONE>,
+ MnemonicAlias<!if(VOP1Only, opName, opName#"_e32"), opName> {
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -75,6 +75,8 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let Uses = ps.Uses;
}
class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
@@ -83,10 +85,17 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
}
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
- list<dag> ret = !if(P.HasModifiers,
- [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
- i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]);
+ list<dag> ret =
+ !if(P.HasModifiers,
+ [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
+ i32:$src0_modifiers,
+ i1:$clamp, i32:$omod))))],
+ !if(P.HasOMod,
+ [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0,
+ i1:$clamp, i32:$omod))))],
+ [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]
+ )
+ );
}
multiclass VOP1Inst <string opName, VOPProfile P,
@@ -96,6 +105,23 @@ multiclass VOP1Inst <string opName, VOPProfile P,
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
}
+// Special profile for instructions which have clamp
+// and output modifiers (but have no input modifiers)
+class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
+ VOPProfile<[dstVt, srcVt, untyped, untyped]> {
+
+ let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
+ let Asm64 = "$vdst, $src0$clamp$omod";
+
+ let HasModifiers = 0;
+ let HasClamp = 1;
+ let HasOMod = 1;
+}
+
+def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
+def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
+def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
+
//===----------------------------------------------------------------------===//
// VOP1 Instructions
//===----------------------------------------------------------------------===//
@@ -142,24 +168,24 @@ def V_READFIRSTLANE_B32 :
let SchedRW = [WriteQuarterRate32] in {
defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
-defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>;
-defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>;
-defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>;
+defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
+defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
+defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
-defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>;
-defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>;
+defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
+defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
-defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>;
+defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
-defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>;
-defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>;
-defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>;
-defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>;
+defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
+defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
+defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>;
+defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>;
defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
-defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>;
+defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
} // End SchedRW = [WriteQuarterRate32]
defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
@@ -237,7 +263,7 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
src0_sel:$src0_sel);
let Asm32 = getAsm32<1, 1>.ret;
- let Asm64 = getAsm64<1, 1, 0>.ret;
+ let Asm64 = getAsm64<1, 1, 0, 1>.ret;
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
@@ -258,11 +284,14 @@ defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>;
defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
} // End Uses = [M0, EXEC]
+let SchedRW = [WriteQuarterRate32] in {
+defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
+}
+
// These instruction only exist on SI and CI
let SubtargetPredicate = isSICI in {
let SchedRW = [WriteQuarterRate32] in {
-defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>;
defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
@@ -297,8 +326,8 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
let SubtargetPredicate = isVI in {
-defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16, uint_to_fp>;
-defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16, sint_to_fp>;
+defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
+defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
@@ -326,12 +355,31 @@ def : Pat<
>;
def : Pat<
- (i16 (fp_to_f16 f32:$src)),
+ (i16 (AMDGPUfp_to_f16 f32:$src)),
(V_CVT_F16_F32_e32 $src)
>;
}
+def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> {
+ let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1);
+ let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1);
+ let Outs64 = Outs32;
+ let Asm32 = " $vdst, $src0";
+ let Asm64 = "";
+ let Ins64 = (ins);
+}
+
+let SubtargetPredicate = isGFX9 in {
+ let Constraints = "$vdst = $src1, $vdst1 = $src0",
+ DisableEncoding="$vdst1,$src1",
+ SchedRW = [Write64Bit, Write64Bit] in {
+// Never VOP3. Takes as long as 2 v_mov_b32s
+def V_SWAP_B32 : VOP1_Pseudo <"v_swap_b32", VOP_SWAP_I32, [], 1>;
+}
+
+} // End SubtargetPredicate = isGFX9
+
//===----------------------------------------------------------------------===//
// Target
//===----------------------------------------------------------------------===//
@@ -453,6 +501,14 @@ class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
let Inst{31-25} = 0x3f; //encoding
}
+multiclass VOP1Only_Real_vi <bits<10> op> {
+ let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+ def _vi :
+ VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
+ }
+}
+
multiclass VOP1_Real_vi <bits<10> op> {
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
def _e32_vi :
@@ -480,6 +536,7 @@ defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>;
defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>;
defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>;
defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>;
+defm V_MOV_FED_B32 : VOP1_Real_vi <0x9>;
defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>;
defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>;
defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>;
@@ -547,7 +604,7 @@ defm V_RNDNE_F16 : VOP1_Real_vi <0x47>;
defm V_FRACT_F16 : VOP1_Real_vi <0x48>;
defm V_SIN_F16 : VOP1_Real_vi <0x49>;
defm V_COS_F16 : VOP1_Real_vi <0x4a>;
-
+defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>;
// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR
// indexing mode. vdst can't be treated as a def for codegen purposes,
diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td
index 00e5ab3db0b7..2281f338ab45 100644
--- a/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/lib/Target/AMDGPU/VOP2Instructions.td
@@ -40,7 +40,7 @@ class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 {
class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> {
bits<8> vdst;
bits<8> src1;
-
+
let Inst{8-0} = 0xf9; // sdwa
let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
@@ -93,6 +93,8 @@ class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let Uses = ps.Uses;
}
class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
@@ -119,8 +121,7 @@ multiclass VOP2Inst <string opName,
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
- def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
- Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>;
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
}
// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst
@@ -134,10 +135,10 @@ multiclass VOP2bInst <string opName,
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
def _e32 : VOP2_Pseudo <opName, P>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
-
- def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
- Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>;
+
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
}
+
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
}
@@ -154,6 +155,7 @@ multiclass VOP2eInst <string opName,
def _e32 : VOP2_Pseudo <opName, P>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
}
+
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
}
@@ -179,10 +181,12 @@ class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
def VOP_MADMK_F16 : VOP_MADMK <f16>;
def VOP_MADMK_F32 : VOP_MADMK <f32>;
+// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory
+// and processing time but it makes it easier to convert to mad.
class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
- HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
+ HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
VGPR_32:$src2, // stub argument
@@ -194,6 +198,7 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
let Asm32 = getAsm32<1, 2, vt>.ret;
+ let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret;
let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret;
let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, vt>.ret;
let HasSrc2 = 0;
@@ -204,13 +209,13 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
def VOP_MAC_F16 : VOP_MAC <f16> {
// FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
// 'not a string initializer' error.
- let Asm64 = getAsm64<1, 2, HasModifiers, f16>.ret;
+ let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f16>.ret;
}
def VOP_MAC_F32 : VOP_MAC <f32> {
// FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
// 'not a string initializer' error.
- let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
+ let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f32>.ret;
}
// Write out to vcc or arbitrary SGPR.
@@ -280,7 +285,7 @@ def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> {
let Outs32 = (outs VGPR_32:$vdst);
let Outs64 = Outs32;
- let Ins32 = (ins SReg_32:$src0, SCSrc_b32:$src1);
+ let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1);
let Ins64 = Ins32;
let Asm32 = " $vdst, $src0, $src1";
let Asm64 = Asm32;
@@ -354,7 +359,7 @@ defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>;
defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst"
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_I32_F32_F32>;
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_I32_F32_F32>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, int_SI_packf16>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, AMDGPUpkrtz_f16_f32>;
defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_I32_I32_I32>;
defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_I32_I32_I32>;
@@ -494,6 +499,14 @@ def : Pat <
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)
>;
+// Undo sub x, c -> add x, -c canonicalization since c is more likely
+// an inline immediate than -c.
+// TODO: Also do for 64-bit.
+def : Pat<
+ (add i16:$src0, (i16 NegSubInlineConst16:$src1)),
+ (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1)
+>;
+
} // End Predicates = [isVI]
//===----------------------------------------------------------------------===//
@@ -566,7 +579,10 @@ defm V_SUBB_U32 : VOP2be_Real_e32e64_si <0x29>;
defm V_SUBBREV_U32 : VOP2be_Real_e32e64_si <0x2a>;
defm V_READLANE_B32 : VOP2_Real_si <0x01>;
+
+let InOperandList = (ins SSrc_b32:$src0, SCSrc_b32:$src1) in {
defm V_WRITELANE_B32 : VOP2_Real_si <0x02>;
+}
defm V_MAC_LEGACY_F32 : VOP2_Real_e32e64_si <0x6>;
defm V_MIN_LEGACY_F32 : VOP2_Real_e32e64_si <0xd>;
@@ -646,7 +662,7 @@ multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
-
+
multiclass VOP2_SDWA_Real <bits<6> op> {
def _sdwa_vi :
VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index c2a4d4ba99b1..217a07488853 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -29,6 +29,26 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
ret1));
}
+class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
+ list<dag> ret3 = [(set P.DstVT:$vdst,
+ (node (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
+ (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))];
+
+ list<dag> ret2 = [(set P.DstVT:$vdst,
+ (node !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+ (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
+ (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))];
+
+ list<dag> ret1 = [(set P.DstVT:$vdst,
+ (node (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+
+ list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+ !if(!eq(P.NumSrcArgs, 2), ret2,
+ ret1));
+}
+
class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
@@ -86,6 +106,14 @@ def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64> {
let DstRC = RegisterOperand<VReg_64>;
}
+def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
+ // FIXME: Hack to stop printing _e64
+ let DstRC = RegisterOperand<VReg_64>;
+
+ let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+ let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
+}
+
//===----------------------------------------------------------------------===//
// VOP3 Instructions
//===----------------------------------------------------------------------===//
@@ -209,10 +237,8 @@ def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I3
def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32>, int_amdgcn_mqsad_u32_u8>;
let isCommutable = 1 in {
-def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3_Profile<VOP_I64_I32_I32_I64>>;
-
-// XXX - Does this set VCC?
-def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3_Profile<VOP_I64_I32_I32_I64>>;
+def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
+def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
} // End isCommutable = 1
} // End SubtargetPredicate = isCIVI
@@ -234,12 +260,14 @@ def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
} // End isCommutable = 1
+def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+
} // End SubtargetPredicate = isVI
let Predicates = [isVI] in {
-multiclass Tenary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
- Instruction inst, SDPatternOperator op3> {
+multiclass Ternary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
+ Instruction inst, SDPatternOperator op3> {
def : Pat<
(op2 (op1 i16:$src0, i16:$src1), i16:$src2),
(inst i16:$src0, i16:$src1, i16:$src2)
@@ -258,11 +286,26 @@ def : Pat<
>;
}
-defm: Tenary_i16_Pats<mul, add, V_MAD_U16, zext>;
-defm: Tenary_i16_Pats<mul, add, V_MAD_I16, sext>;
+defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;
+defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>;
} // End Predicates = [isVI]
+let SubtargetPredicate = isGFX9 in {
+def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16>>;
+def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+
+def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmed3>;
+def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmed3>;
+def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumed3>;
+}
+
//===----------------------------------------------------------------------===//
// Target
@@ -351,11 +394,19 @@ multiclass VOP3_Real_ci<bits<9> op> {
}
}
+multiclass VOP3be_Real_ci<bits<9> op> {
+ def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP3be_si <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
+ let AssemblerPredicates = [isCIOnly];
+ let DecoderNamespace = "CI";
+ }
+}
+
defm V_MQSAD_U16_U8 : VOP3_Real_ci <0x172>;
defm V_QSAD_PK_U16_U8 : VOP3_Real_ci <0x172>;
-defm V_MQSAD_U32_U8 : VOP3_Real_ci <0x174>;
-defm V_MAD_U64_U32 : VOP3_Real_ci <0x176>;
-defm V_MAD_I64_I32 : VOP3_Real_ci <0x177>;
+defm V_MQSAD_U32_U8 : VOP3_Real_ci <0x175>;
+defm V_MAD_U64_U32 : VOP3be_Real_ci <0x176>;
+defm V_MAD_I64_I32 : VOP3be_Real_ci <0x177>;
//===----------------------------------------------------------------------===//
// VI
@@ -376,8 +427,8 @@ multiclass VOP3be_Real_vi<bits<10> op> {
} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
defm V_MQSAD_U16_U8 : VOP3_Real_vi <0x172>;
-defm V_MAD_U64_U32 : VOP3_Real_vi <0x176>;
-defm V_MAD_I64_I32 : VOP3_Real_vi <0x177>;
+defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
+defm V_MAD_I64_I32 : VOP3be_Real_vi <0x1E9>;
defm V_MAD_LEGACY_F32 : VOP3_Real_vi <0x1c0>;
defm V_MAD_F32 : VOP3_Real_vi <0x1c1>;
@@ -424,6 +475,8 @@ defm V_MAD_F16 : VOP3_Real_vi <0x1ea>;
defm V_MAD_U16 : VOP3_Real_vi <0x1eb>;
defm V_MAD_I16 : VOP3_Real_vi <0x1ec>;
+defm V_PERM_B32 : VOP3_Real_vi <0x1ed>;
+
defm V_FMA_F16 : VOP3_Real_vi <0x1ee>;
defm V_DIV_FIXUP_F16 : VOP3_Real_vi <0x1ef>;
@@ -449,3 +502,16 @@ defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>;
defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>;
defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>;
defm V_TRIG_PREOP_F64 : VOP3_Real_vi <0x292>;
+
+defm V_LSHL_ADD_U32 : VOP3_Real_vi <0x1fd>;
+defm V_ADD_LSHL_U32 : VOP3_Real_vi <0x1fe>;
+defm V_ADD3_U32 : VOP3_Real_vi <0x1ff>;
+defm V_LSHL_OR_B32 : VOP3_Real_vi <0x200>;
+defm V_AND_OR_B32 : VOP3_Real_vi <0x201>;
+defm V_OR3_B32 : VOP3_Real_vi <0x202>;
+defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>;
+
+defm V_XAD_U32 : VOP3_Real_vi <0x1f3>;
+defm V_MED3_F16 : VOP3_Real_vi <0x1fa>;
+defm V_MED3_I16 : VOP3_Real_vi <0x1fb>;
+defm V_MED3_U16 : VOP3_Real_vi <0x1fc>;
diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td
new file mode 100644
index 000000000000..96d343099132
--- /dev/null
+++ b/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -0,0 +1,82 @@
+//===-- VOP3PInstructions.td - Vector Instruction Defintions --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VOP3P Classes
+//===----------------------------------------------------------------------===//
+
+class VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
+ VOP3P_Pseudo<OpName, P,
+ !if(P.HasModifiers, getVOP3PModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
+>;
+
+// Non-packed instructions that use the VOP3P encoding. i.e. where
+// omod/abs are used.
+class VOP3_VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
+ VOP3P_Pseudo<OpName, P,
+ !if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
+>;
+
+let isCommutable = 1 in {
+def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>;
+def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fadd>;
+def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmul>;
+def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum>;
+def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum>;
+
+def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, add>;
+def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, sub>;
+def V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, mul>;
+
+def V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, smin>;
+def V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, umin>;
+def V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, smax>;
+def V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, umax>;
+}
+
+def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshl_rev>;
+def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
+def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
+
+// XXX - Commutable?
+def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
+def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
+
+
+multiclass VOP3P_Real_vi<bits<10> op> {
+ def _vi : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3Pe <op, !cast<VOP3P_Pseudo>(NAME).Pfl> {
+ let AssemblerPredicates = [HasVOP3PInsts];
+ let DecoderNamespace = "VI";
+ }
+}
+
+defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>;
+defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>;
+defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>;
+defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x384>;
+defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>;
+defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>;
+defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>;
+defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>;
+
+defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>;
+defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>;
+defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>;
+defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>;
+defm V_PK_ADD_F16 : VOP3P_Real_vi <0x38f>;
+defm V_PK_MUL_F16 : VOP3P_Real_vi <0x390>;
+defm V_PK_MIN_F16 : VOP3P_Real_vi <0x391>;
+defm V_PK_MAX_F16 : VOP3P_Real_vi <0x392>;
+
+defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x3a0>;
+defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
+defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td
index 16a456da3c67..a3550a63677b 100644
--- a/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/lib/Target/AMDGPU/VOPCInstructions.td
@@ -93,6 +93,8 @@ class VOPC_Real <VOPC_Pseudo ps, int EncodingFamily> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let Uses = ps.Uses;
}
class VOPC_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
@@ -165,13 +167,11 @@ multiclass VOPC_Pseudos <string opName,
let isCommutable = 1;
}
- def _sdwa : VOPC_SDWA_Pseudo <opName, P>,
- Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)> {
+ def _sdwa : VOPC_SDWA_Pseudo <opName, P> {
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
let SchedRW = P.Schedule;
let isConvergent = DefExec;
let isCompare = 1;
- let isCommutable = 1;
}
}
@@ -563,7 +563,7 @@ multiclass VOPC_CLASS_F16 <string opName> :
VOPC_Class_Pseudos <opName, VOPC_I1_F16_I32, 0>;
multiclass VOPCX_CLASS_F16 <string opName> :
- VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 1>;
+ VOPC_Class_Pseudos <opName, VOPC_I1_F16_I32, 1>;
multiclass VOPC_CLASS_F32 <string opName> :
VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td
index 5f72f97d9e28..69906c419db3 100644
--- a/lib/Target/AMDGPU/VOPInstructions.td
+++ b/lib/Target/AMDGPU/VOPInstructions.td
@@ -68,8 +68,9 @@ class VOP3Common <dag outs, dag ins, string asm = "",
let hasPostISelHook = 1;
}
-class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3Only = 0> :
- InstSI <P.Outs64, P.Ins64, "", pattern>,
+class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
+ bit VOP3Only = 0, bit isVOP3P = 0> :
+ InstSI <P.Outs64, !if(!and(isVOP3P, P.IsPacked), P.InsVOP3P, P.Ins64), "", pattern>,
VOP <opName>,
SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
MnemonicAlias<opName#"_e64", opName> {
@@ -79,7 +80,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3On
let UseNamedOperandTable = 1;
string Mnemonic = opName;
- string AsmOperands = P.Asm64;
+ string AsmOperands = !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64);
let Size = 8;
let mayLoad = 0;
@@ -100,23 +101,34 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3On
let VOP3 = 1;
let VALU = 1;
+ let FPClamp = P.HasFPClamp;
let Uses = [EXEC];
let AsmVariantName = AMDGPUAsmVariants.VOP3;
let AsmMatchConverter =
!if(!eq(VOP3Only,1),
- "cvtVOP3",
- !if(!eq(P.HasModifiers, 1), "cvtVOP3_2_mod", ""));
+ !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"),
+ !if(!eq(P.HasModifiers, 1),
+ "cvtVOP3_2_mod",
+ !if(!eq(P.HasOMod, 1), "cvtVOP3OMod", "")
+ )
+ );
VOPProfile Pfl = P;
}
+class VOP3P_Pseudo <string opName, VOPProfile P, list<dag> pattern = []> :
+ VOP3_Pseudo<opName, P, pattern, 1, 1> {
+ let VOP3P = 1;
+}
+
class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
let isPseudo = 0;
let isCodeGenOnly = 0;
+ let UseNamedOperandTable = 1;
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
@@ -128,8 +140,15 @@ class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let Uses = ps.Uses;
}
+// XXX - Is there any reason to distingusih this from regular VOP3
+// here?
+class VOP3P_Real<VOP3P_Pseudo ps, int EncodingFamily> :
+ VOP3_Real<ps, EncodingFamily>;
+
class VOP3a<VOPProfile P> : Enc64 {
bits<2> src0_modifiers;
bits<9> src0;
@@ -197,6 +216,42 @@ class VOP3be <VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
}
+class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
+ bits<8> vdst;
+ // neg, neg_hi, op_sel put in srcN_modifiers
+ bits<4> src0_modifiers;
+ bits<9> src0;
+ bits<4> src1_modifiers;
+ bits<9> src1;
+ bits<4> src2_modifiers;
+ bits<9> src2;
+ bits<1> clamp;
+
+ let Inst{7-0} = vdst;
+ let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
+ let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
+ let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
+
+ let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0)
+ let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1)
+ let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2)
+
+ let Inst{14} = !if(P.HasOpSel, src2_modifiers{3}, 0); // op_sel_hi(2)
+
+ let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
+
+ let Inst{25-16} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = !if(P.HasSrc0, src0, 0);
+ let Inst{49-41} = !if(P.HasSrc1, src1, 0);
+ let Inst{58-50} = !if(P.HasSrc2, src2, 0);
+ let Inst{59} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel_hi(0)
+ let Inst{60} = !if(P.HasOpSel, src1_modifiers{3}, 0); // op_sel_hi(1)
+ let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo)
+ let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo)
+ let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
+}
+
class VOP3be_si <bits<9> op, VOPProfile P> : VOP3be<P> {
let Inst{25-17} = op;
}
@@ -250,7 +305,7 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
VOP <opName>,
SIMCInstr <opName#"_sdwa", SIEncodingFamily.NONE>,
MnemonicAlias <opName#"_sdwa", opName> {
-
+
let isPseudo = 1;
let isCodeGenOnly = 1;
let UseNamedOperandTable = 1;
@@ -261,14 +316,14 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
let Size = 8;
let mayLoad = 0;
let mayStore = 0;
- let hasSideEffects = 0;
+ let hasSideEffects = 0;
let VALU = 1;
let SDWA = 1;
let Uses = [EXEC];
-
- let SubtargetPredicate = isVI;
- let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
+
+ let SubtargetPredicate = !if(P.HasExt, HasSDWA, DisableInst);
+ let AssemblerPredicate = !if(P.HasExt, HasSDWA, DisableInst);
let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA,
AMDGPUAsmVariants.Disable);
let DecoderNamespace = "SDWA";
@@ -337,8 +392,8 @@ class VOP_DPP <string OpName, VOPProfile P> :
let Size = 8;
let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
- let SubtargetPredicate = isVI;
- let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
+ let SubtargetPredicate = HasDPP;
+ let AssemblerPredicate = !if(P.HasExt, HasDPP, DisableInst);
let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
AMDGPUAsmVariants.Disable);
let DecoderNamespace = "DPP";
@@ -348,3 +403,4 @@ include "VOPCInstructions.td"
include "VOP1Instructions.td"
include "VOP2Instructions.td"
include "VOP3Instructions.td"
+include "VOP3PInstructions.td"
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 89859ba063d9..8640c873f441 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -427,13 +427,11 @@ unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
unsigned Lane, bool QPR) {
unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
&ARM::DPRRegClass);
- AddDefaultPred(BuildMI(MBB,
- InsertBefore,
- DL,
- TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
- Out)
- .addReg(Reg)
- .addImm(Lane));
+ BuildMI(MBB, InsertBefore, DL,
+ TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
+ .addReg(Reg)
+ .addImm(Lane)
+ .add(predOps(ARMCC::AL));
return Out;
}
@@ -476,13 +474,11 @@ unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
const DebugLoc &DL, unsigned Ssub0,
unsigned Ssub1) {
unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
- AddDefaultPred(BuildMI(MBB,
- InsertBefore,
- DL,
- TII->get(ARM::VEXTd32), Out)
- .addReg(Ssub0)
- .addReg(Ssub1)
- .addImm(1));
+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out)
+ .addReg(Ssub0)
+ .addReg(Ssub1)
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
return Out;
}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index be3048252bbc..39f7988200ea 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -16,21 +16,21 @@
#define LLVM_LIB_TARGET_ARM_ARM_H
#include "llvm/Support/CodeGen.h"
-#include "ARMBasicBlockInfo.h"
#include <functional>
+#include <vector>
namespace llvm {
class ARMAsmPrinter;
class ARMBaseTargetMachine;
+struct BasicBlockInfo;
class Function;
class FunctionPass;
-class ImmutablePass;
+class MachineBasicBlock;
+class MachineFunction;
class MachineInstr;
class MCInst;
class PassRegistry;
-class TargetLowering;
-class TargetMachine;
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -53,7 +53,8 @@ std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF);
void initializeARMLoadStoreOptPass(PassRegistry &);
void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
+void initializeARMConstantIslandsPass(PassRegistry &);
-} // end namespace llvm;
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARM_H
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 2a090faeee6a..57f9d1c6b610 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -72,8 +72,6 @@ def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
"HasHardwareDivideInARM", "true",
"Enable divide instructions in ARM mode">;
-def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
- "Enable Thumb2 extract and pack instructions">;
def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
"Has data barrier (dmb / dsb) instructions">;
def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true",
@@ -263,6 +261,12 @@ def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
"Don't use movt/movw pairs for 32-bit "
"imms">;
+def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
+ "NegativeImmediates", "false",
+ "Convert immediates and instructions "
+ "to their negated or complemented "
+ "equivalent when the immediate does "
+ "not fit in the encoding.">;
//===----------------------------------------------------------------------===//
// ARM ISAa.
@@ -297,8 +301,7 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
FeatureV7Clrex]>;
def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
- [HasV7Ops, FeatureAcquireRelease,
- FeatureT2XtPk]>;
+ [HasV7Ops, FeatureAcquireRelease]>;
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions",
[HasV8Ops]>;
@@ -342,7 +345,9 @@ def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
"Cortex-A73 ARM processors", []>;
def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
- "Qualcomm ARM processors", []>;
+ "Qualcomm Krait processors", []>;
+def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
+ "Qualcomm Kryo processors", []>;
def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
"Swift ARM processors", []>;
@@ -393,8 +398,7 @@ def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>;
def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>;
def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops,
- FeatureDSP,
- FeatureT2XtPk]>;
+ FeatureDSP]>;
def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>;
@@ -415,15 +419,22 @@ def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops,
FeatureNEON,
FeatureDB,
FeatureDSP,
- FeatureAClass,
- FeatureT2XtPk]>;
+ FeatureAClass]>;
+
+def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops,
+ FeatureNEON,
+ FeatureDB,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureAClass]>;
def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops,
FeatureDB,
FeatureDSP,
FeatureHWDiv,
- FeatureRClass,
- FeatureT2XtPk]>;
+ FeatureRClass]>;
def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops,
FeatureThumb2,
@@ -438,8 +449,7 @@ def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops,
FeatureDB,
FeatureHWDiv,
FeatureMClass,
- FeatureDSP,
- FeatureT2XtPk]>;
+ FeatureDSP]>;
def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops,
FeatureAClass,
@@ -481,9 +491,6 @@ def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps,
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
FeatureDB,
- FeatureHWDiv,
- FeatureHWDivARM,
- FeatureT2XtPk,
FeatureDSP,
FeatureCRC,
FeatureMP,
@@ -603,8 +610,6 @@ def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7,
FeatureVMLxForwarding,
FeatureMP,
FeatureVFP4,
- FeatureHWDiv,
- FeatureHWDivARM,
FeatureVirtualization]>;
def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8,
@@ -636,8 +641,6 @@ def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
FeatureTrustZone,
FeatureVMLxForwarding,
FeatureVFP4,
- FeatureHWDiv,
- FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureVirtualization,
FeatureMP]>;
@@ -651,8 +654,6 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
FeatureVFP4,
FeatureMP,
FeatureCheckVLDnAlign,
- FeatureHWDiv,
- FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureVirtualization]>;
@@ -663,8 +664,6 @@ def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
FeatureMP,
FeatureVMLxForwarding,
FeatureVFP4,
- FeatureHWDiv,
- FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureVirtualization]>;
@@ -759,6 +758,15 @@ def : ProcNoItin<"cortex-m7", [ARMv7em,
FeatureFPARMv8,
FeatureD16]>;
+def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
+ FeatureNoMovt]>;
+
+def : ProcNoItin<"cortex-m33", [ARMv8mMainline,
+ FeatureDSP,
+ FeatureFPARMv8,
+ FeatureD16,
+ FeatureVFPOnlySP]>;
+
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDiv,
FeatureHWDivARM,
@@ -829,6 +837,12 @@ def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1,
FeatureCrypto,
FeatureCRC]>;
+def : ProcNoItin<"kryo", [ARMv8a, ProcKryo,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC]>;
+
def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52,
FeatureFPAO]>;
@@ -838,6 +852,8 @@ def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52,
include "ARMRegisterInfo.td"
+include "ARMRegisterBanks.td"
+
include "ARMCallingConv.td"
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 95db35ce8ffb..eb0d410b596b 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -844,7 +844,7 @@ void ARMAsmPrinter::emitAttributes() {
ARMBuildAttrs::Allowed);
else
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
- ARMBuildAttrs::AllowIEE754);
+ ARMBuildAttrs::AllowIEEE754);
if (STI.allowsUnalignedMem())
ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
@@ -1142,6 +1142,11 @@ void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) {
const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
+ // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for
+ // ARM mode tables.
+ EmitAlignment(2);
+
+ // Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
OutStreamer->EmitLabel(JTISymbol);
@@ -1255,7 +1260,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
switch (Opc) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
case ARM::tPUSH:
// Special case here: no src & dst reg, but two extra imp ops.
@@ -1291,7 +1296,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
int64_t Offset = 0;
switch (Opc) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
case ARM::MOVr:
case ARM::tMOVr:
@@ -1346,11 +1351,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
}
}
} else if (DstReg == ARM::SP) {
- MI->dump();
+ MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
}
else {
- MI->dump();
+ MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
}
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 70a3246e34f1..4f5711ca9a79 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -11,34 +11,58 @@
//
//===----------------------------------------------------------------------===//
-#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMFeatures.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <new>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -168,9 +192,8 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg)
.addImm(Amt)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
} else if (Amt != 0) {
ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
@@ -180,17 +203,15 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
.addReg(OffReg)
.addReg(0)
.addImm(SOOpc)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
} else
UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
.addReg(BaseReg)
.addReg(OffReg)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
break;
}
case ARMII::AddrMode3 : {
@@ -202,17 +223,15 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg)
.addImm(Amt)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
else
UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
.addReg(BaseReg)
.addReg(OffReg)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
break;
}
}
@@ -306,7 +325,6 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Walk backwards from the end of the basic block until the branch is
// analyzed or we give up.
while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
-
// Flag to be raised on unanalyzeable instructions. This is useful in cases
// where we want to clean up on the end of the basic block before we bail
// out.
@@ -381,7 +399,6 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
return false;
}
-
unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
assert(!BytesRemoved && "code size not handled");
@@ -433,20 +450,24 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
if (!FBB) {
if (Cond.empty()) { // Unconditional branch?
if (isThumb)
- BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
else
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
} else
- BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
- .addImm(Cond[0].getImm()).addOperand(Cond[1]);
+ BuildMI(&MBB, DL, get(BccOpc))
+ .addMBB(TBB)
+ .addImm(Cond[0].getImm())
+ .add(Cond[1]);
return 1;
}
// Two-way conditional branch.
- BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
- .addImm(Cond[0].getImm()).addOperand(Cond[1]);
+ BuildMI(&MBB, DL, get(BccOpc))
+ .addMBB(TBB)
+ .addImm(Cond[0].getImm())
+ .add(Cond[1]);
if (isThumb)
- BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
else
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
@@ -576,7 +597,7 @@ static bool isEligibleForITBlock(const MachineInstr *MI) {
/// isPredicable - Return true if the specified instruction can be predicated.
/// By default, this returns true for every instruction with a
/// PredicateOperand.
-bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const {
+bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
if (!MI.isPredicable())
return false;
@@ -586,7 +607,7 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const {
if (!isEligibleForITBlock(&MI))
return false;
- ARMFunctionInfo *AFI =
+ const ARMFunctionInfo *AFI =
MI.getParent()->getParent()->getInfo<ARMFunctionInfo>();
if (AFI->isThumb2Function()) {
@@ -601,7 +622,8 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const {
}
namespace llvm {
-template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
+
+template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isUndef() || MO.isUse())
@@ -614,7 +636,8 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
// all definitions of CPSR are dead
return true;
}
-}
+
+} // end namespace llvm
/// GetInstSize - Return the size of the specified MachineInstr.
///
@@ -698,9 +721,8 @@ void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
if (Subtarget.isMClass())
MIB.addImm(0x800);
- AddDefaultPred(MIB);
-
- MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
+ MIB.add(predOps(ARMCC::AL))
+ .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
}
void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
@@ -718,11 +740,9 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
else
MIB.addImm(8);
- MIB.addReg(SrcReg, getKillRegState(KillSrc));
-
- AddDefaultPred(MIB);
-
- MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
+ MIB.addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
}
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -733,8 +753,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
if (GPRDest && GPRSrc) {
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))));
+ BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
return;
}
@@ -758,7 +780,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MIB.addReg(SrcReg, getKillRegState(KillSrc));
if (Opc == ARM::VORRq)
MIB.addReg(SrcReg, getKillRegState(KillSrc));
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
return;
}
@@ -845,10 +867,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// VORR takes two source operands.
if (Opc == ARM::VORRq)
Mov.addReg(Src);
- Mov = AddDefaultPred(Mov);
+ Mov = Mov.add(predOps(ARMCC::AL));
// MOVr can set CC.
if (Opc == ARM::MOVr)
- Mov = AddDefaultCC(Mov);
+ Mov = Mov.add(condCodeOp());
}
// Add implicit super-register defs and kills to the last instruction.
Mov->addRegisterDefined(DestReg, TRI);
@@ -886,35 +908,44 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
switch (RC->getSize()) {
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
case 8:
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
if (Subtarget.hasV5TEOps()) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
- MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
-
- AddDefaultPred(MIB);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
// Fallback to STM instruction, which has existed since the dawn of
// time.
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
- .addFrameIndex(FI).addMemOperand(MMO));
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
}
@@ -925,15 +956,18 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
- .addFrameIndex(FI).addImm(16)
- .addReg(SrcReg, getKillRegState(isKill))
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VST1q64))
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
}
} else
llvm_unreachable("Unknown reg class!");
@@ -942,15 +976,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
- .addFrameIndex(FI).addImm(16)
- .addReg(SrcReg, getKillRegState(isKill))
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
@@ -963,15 +999,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
- .addFrameIndex(FI).addImm(16)
- .addReg(SrcReg, getKillRegState(isKill))
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
@@ -982,10 +1020,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 64:
if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
@@ -1068,19 +1106,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
switch (RC->getSize()) {
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
case 8:
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB;
@@ -1088,14 +1135,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
- MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
-
- AddDefaultPred(MIB);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
// Fallback to LDM instruction, which has existed since the dawn of
// time.
- MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
- .addFrameIndex(FI).addMemOperand(MMO));
+ MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
}
@@ -1108,13 +1156,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 16:
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
- .addFrameIndex(FI).addImm(16)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
- .addFrameIndex(FI)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
}
} else
llvm_unreachable("Unknown reg class!");
@@ -1122,14 +1173,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
- .addFrameIndex(FI).addImm(16)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
- .addFrameIndex(FI)
- .addMemOperand(MMO));
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
@@ -1142,14 +1195,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
- .addFrameIndex(FI).addImm(16)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
@@ -1162,10 +1217,10 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 64:
if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
@@ -1248,7 +1303,7 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
: isThumb1 ? ARM::tLDMIA_UPD
: ARM::LDMIA_UPD))
- .addOperand(MI->getOperand(1));
+ .add(MI->getOperand(1));
} else {
LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
}
@@ -1257,17 +1312,17 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
: isThumb1 ? ARM::tSTMIA_UPD
: ARM::STMIA_UPD))
- .addOperand(MI->getOperand(0));
+ .add(MI->getOperand(0));
} else {
STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
}
- AddDefaultPred(LDM.addOperand(MI->getOperand(3)));
- AddDefaultPred(STM.addOperand(MI->getOperand(2)));
+ LDM.add(MI->getOperand(3)).add(predOps(ARMCC::AL));
+ STM.add(MI->getOperand(2)).add(predOps(ARMCC::AL));
// Sort the scratch registers into ascending order.
const TargetRegisterInfo &TRI = getRegisterInfo();
- llvm::SmallVector<unsigned, 6> ScratchRegs;
+ SmallVector<unsigned, 6> ScratchRegs;
for(unsigned I = 5; I < MI->getNumOperands(); ++I)
ScratchRegs.push_back(MI->getOperand(I).getReg());
std::sort(ScratchRegs.begin(), ScratchRegs.end(),
@@ -1285,7 +1340,6 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
BB->erase(MI);
}
-
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
@@ -1346,7 +1400,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(ARM::VMOVD));
MI.getOperand(0).setReg(DstRegD);
MI.getOperand(1).setReg(SrcRegD);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
// We are now reading SrcRegD instead of SrcRegS. This may upset the
// register scavenger and machine verifier, so we need to indicate that we
@@ -1735,25 +1789,17 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
}
}
}
-
- // Attempt to estimate the relative costs of predication versus branching.
- // Here we scale up each component of UnpredCost to avoid precision issue when
- // scaling NumCycles by Probability.
- const unsigned ScalingUpFactor = 1024;
- unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor);
- UnpredCost += ScalingUpFactor; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
-
- return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost;
+ return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
+ MBB, 0, 0, Probability);
}
bool ARMBaseInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB,
+isProfitableToIfCvt(MachineBasicBlock &,
unsigned TCycles, unsigned TExtra,
- MachineBasicBlock &FMBB,
+ MachineBasicBlock &,
unsigned FCycles, unsigned FExtra,
BranchProbability Probability) const {
- if (!TCycles || !FCycles)
+ if (!TCycles)
return false;
// Attempt to estimate the relative costs of predication versus branching.
@@ -1793,7 +1839,6 @@ ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
}
-
unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
if (Opc == ARM::B)
return ARM::Bcc;
@@ -1920,25 +1965,25 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
const MCInstrDesc &DefDesc = DefMI->getDesc();
for (unsigned i = 1, e = DefDesc.getNumOperands();
i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
- NewMI.addOperand(DefMI->getOperand(i));
+ NewMI.add(DefMI->getOperand(i));
unsigned CondCode = MI.getOperand(3).getImm();
if (Invert)
NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
else
NewMI.addImm(CondCode);
- NewMI.addOperand(MI.getOperand(4));
+ NewMI.add(MI.getOperand(4));
// DefMI is not the -S version that sets CPSR, so add an optional %noreg.
if (NewMI->hasOptionalDef())
- AddDefaultCC(NewMI);
+ NewMI.add(condCodeOp());
// The output register value when the predicate is false is an implicit
// register operand tied to the first def.
// The tie makes the register allocator ensure the FalseReg is allocated the
// same register as operand 0.
FalseReg.setImplicit();
- NewMI.addOperand(FalseReg);
+ NewMI.add(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
// Update SeenMIs set: register newly created MI and erase removed DefMI.
@@ -1983,6 +2028,16 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::RSBSrsi, ARM::RSBrsi},
{ARM::RSBSrsr, ARM::RSBrsr},
+ {ARM::tADDSi3, ARM::tADDi3},
+ {ARM::tADDSi8, ARM::tADDi8},
+ {ARM::tADDSrr, ARM::tADDrr},
+ {ARM::tADCS, ARM::tADC},
+
+ {ARM::tSUBSi3, ARM::tSUBi3},
+ {ARM::tSUBSi8, ARM::tSUBi8},
+ {ARM::tSUBSrr, ARM::tSUBrr},
+ {ARM::tSBCS, ARM::tSBC},
+
{ARM::t2ADDSri, ARM::t2ADDri},
{ARM::t2ADDSrr, ARM::t2ADDrr},
{ARM::t2ADDSrs, ARM::t2ADDrs},
@@ -2011,9 +2066,10 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
unsigned MIFlags) {
if (NumBytes == 0 && DestReg != BaseReg) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
- .addReg(BaseReg, RegState::Kill)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(BaseReg, RegState::Kill)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp())
+ .setMIFlags(MIFlags);
return;
}
@@ -2033,9 +2089,11 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
// Build the new ADD / SUB.
unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp())
+ .setMIFlags(MIFlags);
BaseReg = DestReg;
}
}
@@ -2154,7 +2212,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
// Add the complete list back in.
MachineInstrBuilder MIB(MF, &*MI);
for (int i = RegList.size() - 1; i >= 0; --i)
- MIB.addOperand(RegList[i]);
+ MIB.add(RegList[i]);
return true;
}
@@ -2213,33 +2271,30 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned NumBits = 0;
unsigned Scale = 1;
switch (AddrMode) {
- case ARMII::AddrMode_i12: {
+ case ARMII::AddrMode_i12:
ImmIdx = FrameRegIdx + 1;
InstrOffs = MI.getOperand(ImmIdx).getImm();
NumBits = 12;
break;
- }
- case ARMII::AddrMode2: {
+ case ARMII::AddrMode2:
ImmIdx = FrameRegIdx+2;
InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs *= -1;
NumBits = 12;
break;
- }
- case ARMII::AddrMode3: {
+ case ARMII::AddrMode3:
ImmIdx = FrameRegIdx+2;
InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs *= -1;
NumBits = 8;
break;
- }
case ARMII::AddrMode4:
case ARMII::AddrMode6:
// Can't fold any offset even if it's zero.
return false;
- case ARMII::AddrMode5: {
+ case ARMII::AddrMode5:
ImmIdx = FrameRegIdx+1;
InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
@@ -2247,7 +2302,6 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
NumBits = 8;
Scale = 4;
break;
- }
default:
llvm_unreachable("Unsupported addressing mode!");
}
@@ -2401,6 +2455,63 @@ inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
return false;
}
+static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case ARM::tLSLri:
+ case ARM::tLSRri:
+ case ARM::tLSLrr:
+ case ARM::tLSRrr:
+ case ARM::tSUBrr:
+ case ARM::tADDrr:
+ case ARM::tADDi3:
+ case ARM::tADDi8:
+ case ARM::tSUBi3:
+ case ARM::tSUBi8:
+ case ARM::tMUL:
+ IsThumb1 = true;
+ LLVM_FALLTHROUGH;
+ case ARM::RSBrr:
+ case ARM::RSBri:
+ case ARM::RSCrr:
+ case ARM::RSCri:
+ case ARM::ADDrr:
+ case ARM::ADDri:
+ case ARM::ADCrr:
+ case ARM::ADCri:
+ case ARM::SUBrr:
+ case ARM::SUBri:
+ case ARM::SBCrr:
+ case ARM::SBCri:
+ case ARM::t2RSBri:
+ case ARM::t2ADDrr:
+ case ARM::t2ADDri:
+ case ARM::t2ADCrr:
+ case ARM::t2ADCri:
+ case ARM::t2SUBrr:
+ case ARM::t2SUBri:
+ case ARM::t2SBCrr:
+ case ARM::t2SBCri:
+ case ARM::ANDrr:
+ case ARM::ANDri:
+ case ARM::t2ANDrr:
+ case ARM::t2ANDri:
+ case ARM::ORRrr:
+ case ARM::ORRri:
+ case ARM::t2ORRrr:
+ case ARM::t2ORRri:
+ case ARM::EORrr:
+ case ARM::EORri:
+ case ARM::t2EORrr:
+ case ARM::t2EORri:
+ case ARM::t2LSRri:
+ case ARM::t2LSRrr:
+ case ARM::t2LSLri:
+ case ARM::t2LSLrr:
+ return true;
+ }
+}
+
/// optimizeCompareInstr - Convert the instruction supplying the argument to the
/// comparison into one that sets the zero bit in the flags register;
/// Remove a redundant Compare instruction if an earlier instruction can set the
@@ -2462,6 +2573,41 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
return false;
}
+ bool IsThumb1 = false;
+ if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
+ return false;
+
+ // We also want to do this peephole for cases like this: if (a*b == 0),
+ // and optimise away the CMP instruction from the generated code sequence:
+ // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
+ // resulting from the select instruction, but these MOVS instructions for
+ // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
+ // However, if we only have MOVS instructions in between the CMP and the
+ // other instruction (the MULS in this example), then the CPSR is dead so we
+ // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
+ // reordering and then continue the analysis hoping we can eliminate the
+ // CMP. This peephole works on the vregs, so is still in SSA form. As a
+ // consequence, the movs won't redefine/kill the MUL operands which would
+ // make this reordering illegal.
+ if (MI && IsThumb1) {
+ --I;
+ bool CanReorder = true;
+ const bool HasStmts = I != E;
+ for (; I != E; --I) {
+ if (I->getOpcode() != ARM::tMOVi8) {
+ CanReorder = false;
+ break;
+ }
+ }
+ if (HasStmts && CanReorder) {
+ MI = MI->removeFromParent();
+ E = CmpInstr;
+ CmpInstr.getParent()->insert(E, MI);
+ }
+ I = CmpInstr;
+ E = MI;
+ }
+
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change. At the same time, search for Sub.
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2497,183 +2643,128 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
if (isPredicated(*MI))
return false;
- bool IsThumb1 = false;
- switch (MI->getOpcode()) {
- default: break;
- case ARM::tLSLri:
- case ARM::tLSRri:
- case ARM::tLSLrr:
- case ARM::tLSRrr:
- case ARM::tSUBrr:
- case ARM::tADDrr:
- case ARM::tADDi3:
- case ARM::tADDi8:
- case ARM::tSUBi3:
- case ARM::tSUBi8:
- IsThumb1 = true;
- LLVM_FALLTHROUGH;
- case ARM::RSBrr:
- case ARM::RSBri:
- case ARM::RSCrr:
- case ARM::RSCri:
- case ARM::ADDrr:
- case ARM::ADDri:
- case ARM::ADCrr:
- case ARM::ADCri:
- case ARM::SUBrr:
- case ARM::SUBri:
- case ARM::SBCrr:
- case ARM::SBCri:
- case ARM::t2RSBri:
- case ARM::t2ADDrr:
- case ARM::t2ADDri:
- case ARM::t2ADCrr:
- case ARM::t2ADCri:
- case ARM::t2SUBrr:
- case ARM::t2SUBri:
- case ARM::t2SBCrr:
- case ARM::t2SBCri:
- case ARM::ANDrr:
- case ARM::ANDri:
- case ARM::t2ANDrr:
- case ARM::t2ANDri:
- case ARM::ORRrr:
- case ARM::ORRri:
- case ARM::t2ORRrr:
- case ARM::t2ORRri:
- case ARM::EORrr:
- case ARM::EORri:
- case ARM::t2EORrr:
- case ARM::t2EORri:
- case ARM::t2LSRri:
- case ARM::t2LSRrr:
- case ARM::t2LSLri:
- case ARM::t2LSLrr: {
- // Scan forward for the use of CPSR
- // When checking against MI: if it's a conditional code that requires
- // checking of the V bit or C bit, then this is not safe to do.
- // It is safe to remove CmpInstr if CPSR is redefined or killed.
- // If we are done with the basic block, we need to check whether CPSR is
- // live-out.
- SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
- OperandsToUpdate;
- bool isSafe = false;
- I = CmpInstr;
- E = CmpInstr.getParent()->end();
- while (!isSafe && ++I != E) {
- const MachineInstr &Instr = *I;
- for (unsigned IO = 0, EO = Instr.getNumOperands();
- !isSafe && IO != EO; ++IO) {
- const MachineOperand &MO = Instr.getOperand(IO);
- if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
- isSafe = true;
- break;
- }
- if (!MO.isReg() || MO.getReg() != ARM::CPSR)
- continue;
- if (MO.isDef()) {
- isSafe = true;
- break;
- }
- // Condition code is after the operand before CPSR except for VSELs.
- ARMCC::CondCodes CC;
- bool IsInstrVSel = true;
- switch (Instr.getOpcode()) {
- default:
- IsInstrVSel = false;
- CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
- break;
- case ARM::VSELEQD:
- case ARM::VSELEQS:
- CC = ARMCC::EQ;
- break;
- case ARM::VSELGTD:
- case ARM::VSELGTS:
- CC = ARMCC::GT;
- break;
- case ARM::VSELGED:
- case ARM::VSELGES:
- CC = ARMCC::GE;
- break;
- case ARM::VSELVSS:
- case ARM::VSELVSD:
- CC = ARMCC::VS;
- break;
- }
+ // Scan forward for the use of CPSR
+ // When checking against MI: if it's a conditional code that requires
+ // checking of the V bit or C bit, then this is not safe to do.
+ // It is safe to remove CmpInstr if CPSR is redefined or killed.
+ // If we are done with the basic block, we need to check whether CPSR is
+ // live-out.
+ SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
+ OperandsToUpdate;
+ bool isSafe = false;
+ I = CmpInstr;
+ E = CmpInstr.getParent()->end();
+ while (!isSafe && ++I != E) {
+ const MachineInstr &Instr = *I;
+ for (unsigned IO = 0, EO = Instr.getNumOperands();
+ !isSafe && IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
+ isSafe = true;
+ break;
+ }
+ if (!MO.isReg() || MO.getReg() != ARM::CPSR)
+ continue;
+ if (MO.isDef()) {
+ isSafe = true;
+ break;
+ }
+ // Condition code is after the operand before CPSR except for VSELs.
+ ARMCC::CondCodes CC;
+ bool IsInstrVSel = true;
+ switch (Instr.getOpcode()) {
+ default:
+ IsInstrVSel = false;
+ CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
+ break;
+ case ARM::VSELEQD:
+ case ARM::VSELEQS:
+ CC = ARMCC::EQ;
+ break;
+ case ARM::VSELGTD:
+ case ARM::VSELGTS:
+ CC = ARMCC::GT;
+ break;
+ case ARM::VSELGED:
+ case ARM::VSELGES:
+ CC = ARMCC::GE;
+ break;
+ case ARM::VSELVSS:
+ case ARM::VSELVSD:
+ CC = ARMCC::VS;
+ break;
+ }
- if (Sub) {
- ARMCC::CondCodes NewCC = getSwappedCondition(CC);
- if (NewCC == ARMCC::AL)
- return false;
- // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
- // on CMP needs to be updated to be based on SUB.
- // Push the condition code operands to OperandsToUpdate.
- // If it is safe to remove CmpInstr, the condition code of these
- // operands will be modified.
- if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg) {
- // VSel doesn't support condition code update.
- if (IsInstrVSel)
- return false;
- OperandsToUpdate.push_back(
- std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
- }
- } else {
- // No Sub, so this is x = <op> y, z; cmp x, 0.
- switch (CC) {
- case ARMCC::EQ: // Z
- case ARMCC::NE: // Z
- case ARMCC::MI: // N
- case ARMCC::PL: // N
- case ARMCC::AL: // none
- // CPSR can be used multiple times, we should continue.
- break;
- case ARMCC::HS: // C
- case ARMCC::LO: // C
- case ARMCC::VS: // V
- case ARMCC::VC: // V
- case ARMCC::HI: // C Z
- case ARMCC::LS: // C Z
- case ARMCC::GE: // N V
- case ARMCC::LT: // N V
- case ARMCC::GT: // Z N V
- case ARMCC::LE: // Z N V
- // The instruction uses the V bit or C bit which is not safe.
+ if (Sub) {
+ ARMCC::CondCodes NewCC = getSwappedCondition(CC);
+ if (NewCC == ARMCC::AL)
+ return false;
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
+ // on CMP needs to be updated to be based on SUB.
+ // Push the condition code operands to OperandsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // operands will be modified.
+ if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg) {
+ // VSel doesn't support condition code update.
+ if (IsInstrVSel)
return false;
- }
+ OperandsToUpdate.push_back(
+ std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
}
- }
- }
-
- // If CPSR is not killed nor re-defined, we should check whether it is
- // live-out. If it is live-out, do not optimize.
- if (!isSafe) {
- MachineBasicBlock *MBB = CmpInstr.getParent();
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(ARM::CPSR))
+ } else {
+ // No Sub, so this is x = <op> y, z; cmp x, 0.
+ switch (CC) {
+ case ARMCC::EQ: // Z
+ case ARMCC::NE: // Z
+ case ARMCC::MI: // N
+ case ARMCC::PL: // N
+ case ARMCC::AL: // none
+ // CPSR can be used multiple times, we should continue.
+ break;
+ case ARMCC::HS: // C
+ case ARMCC::LO: // C
+ case ARMCC::VS: // V
+ case ARMCC::VC: // V
+ case ARMCC::HI: // C Z
+ case ARMCC::LS: // C Z
+ case ARMCC::GE: // N V
+ case ARMCC::LT: // N V
+ case ARMCC::GT: // Z N V
+ case ARMCC::LE: // Z N V
+ // The instruction uses the V bit or C bit which is not safe.
return false;
+ }
+ }
}
+ }
- // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
- // set CPSR so this is represented as an explicit output)
- if (!IsThumb1) {
- MI->getOperand(5).setReg(ARM::CPSR);
- MI->getOperand(5).setIsDef(true);
- }
- assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
- CmpInstr.eraseFromParent();
-
- // Modify the condition code of operands in OperandsToUpdate.
- // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
- // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
- for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
- OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
- return true;
+ // If CPSR is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (!isSafe) {
+ MachineBasicBlock *MBB = CmpInstr.getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(ARM::CPSR))
+ return false;
}
+
+ // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
+ // set CPSR so this is represented as an explicit output)
+ if (!IsThumb1) {
+ MI->getOperand(5).setReg(ARM::CPSR);
+ MI->getOperand(5).setIsDef(true);
}
-
- return false;
+ assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
+ CmpInstr.eraseFromParent();
+
+ // Modify the condition code of operands in OperandsToUpdate.
+ // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+ // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
+ OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
+
+ return true;
}
bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
@@ -2728,7 +2819,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
switch (UseOpc) {
default: break;
case ARM::ADDrr:
- case ARM::SUBrr: {
+ case ARM::SUBrr:
if (UseOpc == ARM::SUBrr && Commute)
return false;
@@ -2744,9 +2835,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
break;
- }
case ARM::ORRrr:
- case ARM::EORrr: {
+ case ARM::EORrr:
if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
return false;
SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
@@ -2757,9 +2847,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
case ARM::EORrr: NewUseOpc = ARM::EORri; break;
}
break;
- }
case ARM::t2ADDrr:
- case ARM::t2SUBrr: {
+ case ARM::t2SUBrr:
if (UseOpc == ARM::t2SUBrr && Commute)
return false;
@@ -2775,9 +2864,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
break;
- }
case ARM::t2ORRrr:
- case ARM::t2EORrr: {
+ case ARM::t2EORrr:
if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
return false;
SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
@@ -2789,7 +2877,6 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
}
break;
}
- }
}
}
@@ -2797,11 +2884,12 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
bool isKill = UseMI.getOperand(OpIdx).isKill();
unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
- AddDefaultCC(
- AddDefaultPred(BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
- get(NewUseOpc), NewReg)
- .addReg(Reg1, getKillRegState(isKill))
- .addImm(SOImmValV1)));
+ BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
+ NewReg)
+ .addReg(Reg1, getKillRegState(isKill))
+ .addImm(SOImmValV1)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
UseMI.setDesc(get(NewUseOpc));
UseMI.getOperand(1).setReg(NewReg);
UseMI.getOperand(1).setIsKill();
@@ -3413,7 +3501,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::t2LDMDB:
case ARM::t2LDMIA_UPD:
case ARM::t2LDMDB_UPD:
- LdmBypass = 1;
+ LdmBypass = true;
DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
break;
}
@@ -3888,12 +3976,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::t2LDRs:
case ARM::t2LDRBs:
case ARM::t2LDRHs:
- case ARM::t2LDRSHs: {
+ case ARM::t2LDRSHs:
// Thumb2 mode: lsl 0-3 only.
Latency -= 2;
break;
}
- }
}
if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
@@ -4180,14 +4267,14 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
- MIB.addMemOperand(MMO);
- AddDefaultPred(MIB);
+ MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
}
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
- MIB.addReg(Reg, RegState::Kill).addImm(0);
- MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
- AddDefaultPred(MIB);
+ MIB.addReg(Reg, RegState::Kill)
+ .addImm(0)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
+ .add(predOps(ARMCC::AL));
}
bool
@@ -4222,6 +4309,7 @@ enum ARMExeDomain {
ExeVFP = 1,
ExeNEON = 2
};
+
//
// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
//
@@ -4345,8 +4433,10 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
// Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
MI.setDesc(get(ARM::VORRd));
- AddDefaultPred(
- MIB.addReg(DstReg, RegState::Define).addReg(SrcReg).addReg(SrcReg));
+ MIB.addReg(DstReg, RegState::Define)
+ .addReg(SrcReg)
+ .addReg(SrcReg)
+ .add(predOps(ARMCC::AL));
break;
case ARM::VMOVRS:
if (Domain != ExeNEON)
@@ -4366,9 +4456,10 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
// Note that DSrc has been widened and the other lane may be undef, which
// contaminates the entire register.
MI.setDesc(get(ARM::VGETLNi32));
- AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
- .addReg(DReg, RegState::Undef)
- .addImm(Lane));
+ MIB.addReg(DstReg, RegState::Define)
+ .addReg(DReg, RegState::Undef)
+ .addImm(Lane)
+ .add(predOps(ARMCC::AL));
// The old source should be an implicit use, otherwise we might think it
// was dead before here.
@@ -4398,8 +4489,8 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
MIB.addReg(DReg, RegState::Define)
.addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
.addReg(SrcReg)
- .addImm(Lane);
- AddDefaultPred(MIB);
+ .addImm(Lane)
+ .add(predOps(ARMCC::AL));
// The narrower destination must be marked as set to keep previous chains
// in place.
@@ -4433,8 +4524,8 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
MI.setDesc(get(ARM::VDUPLN32d));
MIB.addReg(DDst, RegState::Define)
.addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
- .addImm(SrcLane);
- AddDefaultPred(MIB);
+ .addImm(SrcLane)
+ .add(predOps(ARMCC::AL));
// Neither the source or the destination are naturally represented any
// more, so add them in manually.
@@ -4470,10 +4561,9 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
CurUndef = !MI.readsRegister(CurReg, TRI);
- NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
-
- NewMIB.addImm(1);
- AddDefaultPred(NewMIB);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
if (SrcLane == DstLane)
NewMIB.addReg(SrcReg, RegState::Implicit);
@@ -4489,10 +4579,9 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
- MIB.addReg(CurReg, getUndefRegState(CurUndef));
-
- MIB.addImm(1);
- AddDefaultPred(MIB);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef))
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
if (SrcLane != DstLane)
MIB.addReg(SrcReg, RegState::Implicit);
@@ -4505,7 +4594,6 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
break;
}
}
-
}
//===----------------------------------------------------------------------===//
@@ -4613,9 +4701,9 @@ void ARMBaseInstrInfo::breakPartialRegDependency(
// Insert the dependency-breaking FCONSTD before MI.
// 96 is the encoding of 0.5, but the actual value doesn't matter here.
- AddDefaultPred(
- BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
- .addImm(96));
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
+ .addImm(96)
+ .add(predOps(ARMCC::AL));
MI.addRegisterKilled(DReg, TRI, true);
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index b01d5c8ec85f..23777b821f9f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -17,16 +17,21 @@
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/CodeGen.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include <array>
+#include <cstdint>
#define GET_INSTRINFO_HEADER
#include "ARMGenInstrInfo.inc"
namespace llvm {
- class ARMSubtarget;
- class ARMBaseRegisterInfo;
+
+class ARMBaseRegisterInfo;
+class ARMSubtarget;
class ARMBaseInstrInfo : public ARMGenInstrInfo {
const ARMSubtarget &Subtarget;
@@ -106,7 +111,7 @@ public:
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
- virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
+ virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0;
MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr &MI,
@@ -156,7 +161,7 @@ public:
bool DefinesPredicate(MachineInstr &MI,
std::vector<MachineOperand> &Pred) const override;
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
/// GetInstSize - Returns the size of the specified MachineInstr.
///
@@ -401,25 +406,28 @@ public:
bool isSwiftFastImmShift(const MachineInstr *MI) const;
};
-static inline
-const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
- return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+/// Get the operands corresponding to the given \p Pred value. By default, the
+/// predicate register is assumed to be 0 (no register), but you can pass in a
+/// \p PredReg if that is not the case.
+static inline std::array<MachineOperand, 2> predOps(ARMCC::CondCodes Pred,
+ unsigned PredReg = 0) {
+ return {{MachineOperand::CreateImm(static_cast<int64_t>(Pred)),
+ MachineOperand::CreateReg(PredReg, false)}};
}
-static inline
-const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
- return MIB.addReg(0);
+/// Get the operand corresponding to the conditional code result. By default,
+/// this is 0 (no register).
+static inline MachineOperand condCodeOp(unsigned CCReg = 0) {
+ return MachineOperand::CreateReg(CCReg, false);
}
-static inline
-const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB,
- bool isDead = false) {
- return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead));
-}
-
-static inline
-const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
- return MIB.addReg(0);
+/// Get the operand corresponding to the conditional code result for Thumb1.
+/// This operand will always refer to CPSR and it will have the Define flag set.
+/// You can optionally set the Dead flag by means of \p isDead.
+static inline MachineOperand t1CondCodeOp(bool isDead = false) {
+ return MachineOperand::CreateReg(ARM::CPSR,
+ /*Define*/ true, /*Implicit*/ false,
+ /*Kill*/ false, isDead);
}
static inline
@@ -517,6 +525,6 @@ bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
const ARMBaseInstrInfo &TII);
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index d995c631dd1c..70a44eaaceb8 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -11,32 +11,42 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMBaseRegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMFrameLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <utility>
#define DEBUG_TYPE "arm-register-info"
@@ -46,7 +56,7 @@
using namespace llvm;
ARMBaseRegisterInfo::ARMBaseRegisterInfo()
- : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {}
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC) {}
static unsigned getFramePointerReg(const ARMSubtarget &STI) {
return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11;
@@ -140,7 +150,6 @@ ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) con
return CSR_FPRegs_RegMask;
}
-
const uint32_t *
ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
@@ -425,10 +434,11 @@ void ARMBaseRegisterInfo::emitLoadConstPool(
unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
- .addReg(DestReg, getDefRegState(true), SubIdx)
- .addConstantPoolIndex(Idx)
- .addImm(0).addImm(Pred).addReg(PredReg)
- .setMIFlags(MIFlags);
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx)
+ .addImm(0)
+ .add(predOps(Pred, PredReg))
+ .setMIFlags(MIFlags);
}
bool ARMBaseRegisterInfo::
@@ -474,26 +484,23 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
Scale = 4;
break;
}
- case ARMII::AddrMode2: {
+ case ARMII::AddrMode2:
ImmIdx = Idx+2;
InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs = -InstrOffs;
break;
- }
- case ARMII::AddrMode3: {
+ case ARMII::AddrMode3:
ImmIdx = Idx+2;
InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs = -InstrOffs;
break;
- }
- case ARMII::AddrModeT1_s: {
+ case ARMII::AddrModeT1_s:
ImmIdx = Idx+1;
InstrOffs = MI->getOperand(ImmIdx).getImm();
Scale = 4;
break;
- }
default:
llvm_unreachable("Unsupported addressing mode!");
}
@@ -609,7 +616,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addFrameIndex(FrameIdx).addImm(Offset);
if (!AFI->isThumb1OnlyFunction())
- AddDefaultCC(AddDefaultPred(MIB));
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
}
void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
@@ -636,7 +643,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
assert(AFI->isThumb2Function());
Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
}
- assert (Done && "Unable to resolve frame index!");
+ assert(Done && "Unable to resolve frame index!");
(void)Done;
}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 330e1535e863..2e91d9d4be24 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -15,24 +15,33 @@
#define LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <cstdint>
#define GET_REGINFO_HEADER
#include "ARMGenRegisterInfo.inc"
namespace llvm {
+
/// Register allocation hints.
namespace ARMRI {
+
enum {
RegPairOdd = 1,
RegPairEven = 2
};
-}
+
+} // end namespace ARMRI
/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
/// or a stack/pc register that we should push/pop.
static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
using namespace ARM;
+
switch (Reg) {
case R0: case R1: case R2: case R3:
case R4: case R5: case R6: case R7:
@@ -48,6 +57,7 @@ static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
using namespace ARM;
+
switch (Reg) {
case R8: case R9: case R10: case R11: case R12:
// iOS has this second area.
@@ -59,6 +69,7 @@ static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
using namespace ARM;
+
switch (Reg) {
case D15: case D14: case D13: case D12:
case D11: case D10: case D9: case D8:
@@ -87,7 +98,7 @@ protected:
/// BasePtr - ARM physical register used as a base ptr in complex stack
/// frames. I.e., when we need a 3rd base, not just SP and FP, due to
/// variable size stack objects.
- unsigned BasePtr;
+ unsigned BasePtr = ARM::R6;
// Can be only subclassed.
explicit ARMBaseRegisterInfo();
@@ -198,4 +209,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
diff --git a/lib/Target/ARM/ARMBasicBlockInfo.h b/lib/Target/ARM/ARMBasicBlockInfo.h
index 780544f865df..e0cb0aa676a6 100644
--- a/lib/Target/ARM/ARMBasicBlockInfo.h
+++ b/lib/Target/ARM/ARMBasicBlockInfo.h
@@ -14,9 +14,9 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
#define LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
-#include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
-using namespace llvm;
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cstdint>
namespace llvm {
@@ -44,31 +44,30 @@ struct BasicBlockInfo {
///
/// Because worst case padding is used, the computed offset of an aligned
/// block may not actually be aligned.
- unsigned Offset;
+ unsigned Offset = 0;
/// Size - Size of the basic block in bytes. If the block contains
/// inline assembly, this is a worst case estimate.
///
/// The size does not include any alignment padding whether from the
/// beginning of the block, or from an aligned jump table at the end.
- unsigned Size;
+ unsigned Size = 0;
/// KnownBits - The number of low bits in Offset that are known to be
/// exact. The remaining bits of Offset are an upper bound.
- uint8_t KnownBits;
+ uint8_t KnownBits = 0;
/// Unalign - When non-zero, the block contains instructions (inline asm)
/// of unknown size. The real size may be smaller than Size bytes by a
/// multiple of 1 << Unalign.
- uint8_t Unalign;
+ uint8_t Unalign = 0;
/// PostAlign - When non-zero, the block terminator contains a .align
/// directive, so the end of the block is aligned to 1 << PostAlign
/// bytes.
- uint8_t PostAlign;
+ uint8_t PostAlign = 0;
- BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
- PostAlign(0) {}
+ BasicBlockInfo() = default;
/// Compute the number of known offset bits internally to this block.
/// This number should be used to predict worst case padding when
@@ -107,4 +106,4 @@ struct BasicBlockInfo {
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index 52c95b6244ac..94b317a8f986 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -17,7 +17,9 @@
#include "ARMBaseInstrInfo.h"
#include "ARMISelLowering.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -30,25 +32,47 @@ using namespace llvm;
ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
: CallLowering(&TLI) {}
-static bool isSupportedType(const DataLayout DL, const ARMTargetLowering &TLI,
+static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
Type *T) {
- EVT VT = TLI.getValueType(DL, T);
- if (!VT.isSimple() || !VT.isInteger() || VT.isVector())
+ EVT VT = TLI.getValueType(DL, T, true);
+ if (!VT.isSimple() || VT.isVector())
return false;
unsigned VTSize = VT.getSimpleVT().getSizeInBits();
- return VTSize == 8 || VTSize == 16 || VTSize == 32;
+
+ if (VTSize == 64)
+ // FIXME: Support i64 too
+ return VT.isFloatingPoint();
+
+ return VTSize == 1 || VTSize == 8 || VTSize == 16 || VTSize == 32;
}
namespace {
-struct FuncReturnHandler : public CallLowering::ValueHandler {
- FuncReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder &MIB)
- : ValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+/// Helper class for values going out through an ABI boundary (used for handling
+/// function return values and call parameters).
+struct OutgoingValueHandler : public CallLowering::ValueHandler {
+ OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder &MIB, CCAssignFn *AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), StackSize(0) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
- llvm_unreachable("Don't know how to get a stack address yet");
+ assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
+ "Unsupported size");
+
+ LLT p0 = LLT::pointer(0, 32);
+ LLT s32 = LLT::scalar(32);
+ unsigned SPReg = MRI.createGenericVirtualRegister(p0);
+ MIRBuilder.buildCopy(SPReg, ARM::SP);
+
+ unsigned OffsetReg = MRI.createGenericVirtualRegister(s32);
+ MIRBuilder.buildConstant(OffsetReg, Offset);
+
+ unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
+ MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
+
+ MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+ return AddrReg;
}
void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
@@ -56,26 +80,92 @@ struct FuncReturnHandler : public CallLowering::ValueHandler {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
- assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size");
- assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size");
-
- assert(VA.getLocInfo() != CCValAssign::SExt &&
- VA.getLocInfo() != CCValAssign::ZExt &&
- "ABI extensions not supported yet");
+ assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size");
+ assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size");
- MIRBuilder.buildCopy(PhysReg, ValVReg);
+ unsigned ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
}
void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
- llvm_unreachable("Don't know how to assign a value to an address yet");
+ assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
+ "Unsupported size");
+
+ unsigned ExtReg = extendRegister(ValVReg, VA);
+ auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+ MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(),
+ /* Alignment */ 0);
+ MIRBuilder.buildStore(ExtReg, Addr, *MMO);
+ }
+
+ unsigned assignCustomValue(const CallLowering::ArgInfo &Arg,
+ ArrayRef<CCValAssign> VAs) override {
+ CCValAssign VA = VAs[0];
+ assert(VA.needsCustom() && "Value doesn't need custom handling");
+ assert(VA.getValVT() == MVT::f64 && "Unsupported type");
+
+ CCValAssign NextVA = VAs[1];
+ assert(NextVA.needsCustom() && "Value doesn't need custom handling");
+ assert(NextVA.getValVT() == MVT::f64 && "Unsupported type");
+
+ assert(VA.getValNo() == NextVA.getValNo() &&
+ "Values belong to different arguments");
+
+ assert(VA.isRegLoc() && "Value should be in reg");
+ assert(NextVA.isRegLoc() && "Value should be in reg");
+
+ unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ MRI.createGenericVirtualRegister(LLT::scalar(32))};
+ MIRBuilder.buildExtract(NewRegs[0], Arg.Reg, 0);
+ MIRBuilder.buildExtract(NewRegs[1], Arg.Reg, 32);
+
+ bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle();
+ if (!IsLittle)
+ std::swap(NewRegs[0], NewRegs[1]);
+
+ assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
+ assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
+
+ return 1;
+ }
+
+ bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ const CallLowering::ArgInfo &Info, CCState &State) override {
+ if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State))
+ return true;
+
+ StackSize =
+ std::max(StackSize, static_cast<uint64_t>(State.getNextStackOffset()));
+ return false;
}
MachineInstrBuilder &MIB;
+ uint64_t StackSize;
};
} // End anonymous namespace.
+void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL,
+ MachineRegisterInfo &MRI) const {
+ const ARMTargetLowering &TLI = *getTLI<ARMTargetLowering>();
+ LLVMContext &Ctx = OrigArg.Ty->getContext();
+
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+
+ assert(SplitVTs.size() == 1 && "Unsupported type");
+
+ // Even if there is no splitting to do, we still want to replace the original
+ // type (e.g. pointer type -> integer).
+ SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
+ OrigArg.Flags, OrigArg.IsFixed);
+}
+
/// Lower the return value for the already existing \p Ret. This assumes that
/// \p MIRBuilder's insertion point is correct.
bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
@@ -93,21 +183,23 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
if (!isSupportedType(DL, TLI, Val->getType()))
return false;
+ SmallVector<ArgInfo, 4> SplitVTs;
+ ArgInfo RetInfo(VReg, Val->getType());
+ setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
+ splitToValueTypes(RetInfo, SplitVTs, DL, MF.getRegInfo());
+
CCAssignFn *AssignFn =
TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
- ArgInfo RetInfo(VReg, Val->getType());
- setArgFlags(RetInfo, AttributeSet::ReturnIndex, DL, F);
-
- FuncReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret);
- return handleAssignments(MIRBuilder, AssignFn, RetInfo, RetHandler);
+ OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn);
+ return handleAssignments(MIRBuilder, SplitVTs, RetHandler);
}
bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val, unsigned VReg) const {
assert(!Val == !VReg && "Return value without a vreg");
- auto Ret = AddDefaultPred(MIRBuilder.buildInstrNoInsert(ARM::BX_RET));
+ auto Ret = MIRBuilder.buildInstrNoInsert(ARM::BX_RET).add(predOps(ARMCC::AL));
if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret))
return false;
@@ -117,13 +209,17 @@ bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
}
namespace {
-struct FormalArgHandler : public CallLowering::ValueHandler {
- FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
- : ValueHandler(MIRBuilder, MRI) {}
+/// Helper class for values coming in through an ABI boundary (used for handling
+/// formal arguments and call return values).
+struct IncomingValueHandler : public CallLowering::ValueHandler {
+ IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
- assert(Size == 4 && "Unsupported size");
+ assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
+ "Unsupported size");
auto &MFI = MIRBuilder.getMF().getFrameInfo();
@@ -139,7 +235,17 @@ struct FormalArgHandler : public CallLowering::ValueHandler {
void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
- assert(Size == 4 && "Unsupported size");
+ assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
+ "Unsupported size");
+
+ if (VA.getLocInfo() == CCValAssign::SExt ||
+ VA.getLocInfo() == CCValAssign::ZExt) {
+ // If the value is zero- or sign-extended, its size becomes 4 bytes, so
+ // that's what we should load.
+ Size = 4;
+ assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm");
+ MRI.setType(ValVReg, LLT::scalar(32));
+ }
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0);
@@ -151,12 +257,60 @@ struct FormalArgHandler : public CallLowering::ValueHandler {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
- assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size");
- assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size");
+ assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size");
+ assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size");
- MIRBuilder.getMBB().addLiveIn(PhysReg);
+ // The necesary extensions are handled on the other side of the ABI
+ // boundary.
+ markPhysRegUsed(PhysReg);
MIRBuilder.buildCopy(ValVReg, PhysReg);
}
+
+ unsigned assignCustomValue(const ARMCallLowering::ArgInfo &Arg,
+ ArrayRef<CCValAssign> VAs) override {
+ CCValAssign VA = VAs[0];
+ assert(VA.needsCustom() && "Value doesn't need custom handling");
+ assert(VA.getValVT() == MVT::f64 && "Unsupported type");
+
+ CCValAssign NextVA = VAs[1];
+ assert(NextVA.needsCustom() && "Value doesn't need custom handling");
+ assert(NextVA.getValVT() == MVT::f64 && "Unsupported type");
+
+ assert(VA.getValNo() == NextVA.getValNo() &&
+ "Values belong to different arguments");
+
+ assert(VA.isRegLoc() && "Value should be in reg");
+ assert(NextVA.isRegLoc() && "Value should be in reg");
+
+ unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ MRI.createGenericVirtualRegister(LLT::scalar(32))};
+
+ assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
+ assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
+
+ bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle();
+ if (!IsLittle)
+ std::swap(NewRegs[0], NewRegs[1]);
+
+ MIRBuilder.buildSequence(Arg.Reg, NewRegs, {0, 32});
+
+ return 1;
+ }
+
+ /// Marking a physical register as used is different between formal
+ /// parameters, where it's a basic block live-in, and call returns, where it's
+ /// an implicit-def of the call instruction.
+ virtual void markPhysRegUsed(unsigned PhysReg) = 0;
+};
+
+struct FormalArgHandler : public IncomingValueHandler {
+ FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn AssignFn)
+ : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
+
+ void markPhysRegUsed(unsigned PhysReg) override {
+ MIRBuilder.getMBB().addLiveIn(PhysReg);
+ }
};
} // End anonymous namespace
@@ -170,34 +324,111 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
if (F.isVarArg())
return false;
- auto DL = MIRBuilder.getMF().getDataLayout();
+ auto &MF = MIRBuilder.getMF();
+ auto DL = MF.getDataLayout();
auto &TLI = *getTLI<ARMTargetLowering>();
- auto &Args = F.getArgumentList();
- unsigned ArgIdx = 0;
- for (auto &Arg : Args) {
- ArgIdx++;
- if (!isSupportedType(DL, TLI, Arg.getType()))
- return false;
+ auto Subtarget = TLI.getSubtarget();
- // FIXME: This check as well as ArgIdx are going away as soon as we support
- // loading values < 32 bits.
- if (ArgIdx > 4 && Arg.getType()->getIntegerBitWidth() != 32)
+ if (Subtarget->isThumb())
+ return false;
+
+ for (auto &Arg : F.args())
+ if (!isSupportedType(DL, TLI, Arg.getType()))
return false;
- }
CCAssignFn *AssignFn =
TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg());
SmallVector<ArgInfo, 8> ArgInfos;
unsigned Idx = 0;
- for (auto &Arg : Args) {
+ for (auto &Arg : F.args()) {
ArgInfo AInfo(VRegs[Idx], Arg.getType());
setArgFlags(AInfo, Idx + 1, DL, F);
- ArgInfos.push_back(AInfo);
+ splitToValueTypes(AInfo, ArgInfos, DL, MF.getRegInfo());
Idx++;
}
- FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo());
- return handleAssignments(MIRBuilder, AssignFn, ArgInfos, ArgHandler);
+ FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(),
+ AssignFn);
+ return handleAssignments(MIRBuilder, ArgInfos, ArgHandler);
+}
+
+namespace {
+struct CallReturnHandler : public IncomingValueHandler {
+ CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn)
+ : IncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+
+ void markPhysRegUsed(unsigned PhysReg) override {
+ MIB.addDef(PhysReg, RegState::Implicit);
+ }
+
+ MachineInstrBuilder MIB;
+};
+} // End anonymous namespace.
+
+bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+ CallingConv::ID CallConv,
+ const MachineOperand &Callee,
+ const ArgInfo &OrigRet,
+ ArrayRef<ArgInfo> OrigArgs) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const auto &TLI = *getTLI<ARMTargetLowering>();
+ const auto &DL = MF.getDataLayout();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (MF.getSubtarget<ARMSubtarget>().genLongCalls())
+ return false;
+
+ auto CallSeqStart = MIRBuilder.buildInstr(ARM::ADJCALLSTACKDOWN);
+
+ // Create the call instruction so we can add the implicit uses of arg
+ // registers, but don't insert it yet.
+ auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask(
+ TRI->getCallPreservedMask(MF, CallConv));
+
+ SmallVector<ArgInfo, 8> ArgInfos;
+ for (auto Arg : OrigArgs) {
+ if (!isSupportedType(DL, TLI, Arg.Ty))
+ return false;
+
+ if (!Arg.IsFixed)
+ return false;
+
+ splitToValueTypes(Arg, ArgInfos, DL, MRI);
+ }
+
+ auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
+ OutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn);
+ if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
+ return false;
+
+ // Now we can add the actual call instruction to the correct basic block.
+ MIRBuilder.insertInstr(MIB);
+
+ if (!OrigRet.Ty->isVoidTy()) {
+ if (!isSupportedType(DL, TLI, OrigRet.Ty))
+ return false;
+
+ ArgInfos.clear();
+ splitToValueTypes(OrigRet, ArgInfos, DL, MRI);
+
+ auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false);
+ CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
+ if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
+ return false;
+ }
+
+ // We now know the size of the stack - update the ADJCALLSTACKDOWN
+ // accordingly.
+ CallSeqStart.addImm(ArgHandler.StackSize).add(predOps(ARMCC::AL));
+
+ MIRBuilder.buildInstr(ARM::ADJCALLSTACKUP)
+ .addImm(ArgHandler.StackSize)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+
+ return true;
}
diff --git a/lib/Target/ARM/ARMCallLowering.h b/lib/Target/ARM/ARMCallLowering.h
index 6a1b886b501f..6404c7a2689e 100644
--- a/lib/Target/ARM/ARMCallLowering.h
+++ b/lib/Target/ARM/ARMCallLowering.h
@@ -34,9 +34,19 @@ public:
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
+ bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
+ const MachineOperand &Callee, const ArgInfo &OrigRet,
+ ArrayRef<ArgInfo> OrigArgs) const override;
+
private:
bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
unsigned VReg, MachineInstrBuilder &Ret) const;
+
+ /// Split an argument into one or more arguments that the CC lowering can cope
+ /// with (e.g. replace pointers with integers).
+ void splitToValueTypes(const ArgInfo &OrigArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, MachineRegisterInfo &MRI) const;
};
} // End of namespace llvm
#endif
diff --git a/lib/Target/ARM/ARMComputeBlockSize.cpp b/lib/Target/ARM/ARMComputeBlockSize.cpp
index 64f187d17e64..e145d0a49ae6 100644
--- a/lib/Target/ARM/ARMComputeBlockSize.cpp
+++ b/lib/Target/ARM/ARMComputeBlockSize.cpp
@@ -8,7 +8,15 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBasicBlockInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <vector>
+
using namespace llvm;
namespace llvm {
@@ -69,4 +77,4 @@ std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF) {
return BBInfo;
}
-} // end namespace
+} // end namespace llvm
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index be1a37e3e362..23722f1b7f3f 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -14,30 +14,50 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBasicBlockInfo.h"
#include "ARMMachineFunctionInfo.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "Thumb2InstrInfo.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <new>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "arm-cp-islands"
+#define ARM_CP_ISLANDS_OPT_NAME \
+ "ARM constant island placement and branch shortening pass"
STATISTIC(NumCPEs, "Number of constpool entries");
STATISTIC(NumSplit, "Number of uncond branches inserted");
STATISTIC(NumCBrFixed, "Number of cond branches fixed");
@@ -49,7 +69,6 @@ STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
-
static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
cl::desc("Adjust basic block layout to better use TB[BH]"));
@@ -64,6 +83,7 @@ static cl::opt<bool> SynthesizeThumb1TBB(
"equivalent to the TBB/TBH instructions"));
namespace {
+
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
/// inside a function. To do this, it completely ignores the normal LLVM
@@ -76,7 +96,6 @@ namespace {
/// CPE - A constant pool entry that has been placed somewhere, which
/// tracks a list of users.
class ARMConstantIslands : public MachineFunctionPass {
-
std::vector<BasicBlockInfo> BBInfo;
/// WaterList - A sorted list of basic blocks where islands could be placed
@@ -110,12 +129,14 @@ namespace {
bool NegOk;
bool IsSoImm;
bool KnownAlignment;
+
CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
bool neg, bool soimm)
: MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm),
KnownAlignment(false) {
HighWaterMark = CPEMI->getParent();
}
+
/// getMaxDisp - Returns the maximum displacement supported by MI.
/// Correct for unknown alignment.
/// Conservatively subtract 2 bytes to handle weird alignment effects.
@@ -135,6 +156,7 @@ namespace {
MachineInstr *CPEMI;
unsigned CPI;
unsigned RefCount;
+
CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
: CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
};
@@ -148,7 +170,7 @@ namespace {
/// The first half of CPEntries contains generic constants, the second half
/// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
/// which vector it will be in here.
- std::vector<std::vector<CPEntry> > CPEntries;
+ std::vector<std::vector<CPEntry>> CPEntries;
/// Maps a JT index to the offset in CPEntries containing copies of that
/// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
@@ -167,6 +189,7 @@ namespace {
unsigned MaxDisp : 31;
bool isCond : 1;
unsigned UncondBr;
+
ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr)
: MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
};
@@ -195,8 +218,10 @@ namespace {
bool isThumb1;
bool isThumb2;
bool isPositionIndependentOrROPI;
+
public:
static char ID;
+
ARMConstantIslands() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -207,7 +232,7 @@ namespace {
}
StringRef getPassName() const override {
- return "ARM constant island placement and branch shortening pass";
+ return ARM_CP_ISLANDS_OPT_NAME;
}
private:
@@ -264,8 +289,10 @@ namespace {
U.getMaxDisp(), U.NegOk, U.IsSoImm);
}
};
+
char ARMConstantIslands::ID = 0;
-}
+
+} // end anonymous namespace
/// verify - check BBOffsets, BBSizes, alignment of islands
void ARMConstantIslands::verify() {
@@ -295,8 +322,9 @@ void ARMConstantIslands::verify() {
#endif
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// print block size and offset information - debugging
-void ARMConstantIslands::dumpBBs() {
+LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
DEBUG({
for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
const BasicBlockInfo &BBI = BBInfo[J];
@@ -308,12 +336,7 @@ void ARMConstantIslands::dumpBBs() {
}
});
}
-
-/// createARMConstantIslandPass - returns an instance of the constpool
-/// island pass.
-FunctionPass *llvm::createARMConstantIslandPass() {
- return new ARMConstantIslands();
-}
+#endif
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
@@ -782,6 +805,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
case ARM::LDRcp:
case ARM::t2LDRpci:
case ARM::t2LDRHpci:
+ case ARM::t2LDRBpci:
Bits = 12; // +-offset_12
NegOk = true;
break;
@@ -873,7 +897,6 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
WaterList.insert(IP, NewBB);
}
-
/// Split the basic block containing MI into two blocks, which are joined by
/// an unconditional branch. Update data structures and renumber blocks to
/// account for this change and returns the newly created block.
@@ -897,8 +920,9 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
if (!isThumb)
BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
else
- BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB)
- .addImm(ARMCC::AL).addReg(0);
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc))
+ .addMBB(NewBB)
+ .add(predOps(ARMCC::AL));
++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
@@ -1296,8 +1320,9 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
if (!isThumb)
BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
else
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
- .addImm(ARMCC::AL).addReg(0);
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr))
+ .addMBB(NewMBB)
+ .add(predOps(ARMCC::AL));
unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
ImmBranches.push_back(ImmBranch(&UserMBB->back(),
MaxDisp, false, UncondBr));
@@ -1477,7 +1502,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
// add it to the island.
U.HighWaterMark = NewIsland;
U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
- .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size);
+ .addImm(ID)
+ .add(CPEMI->getOperand(1))
+ .addImm(Size);
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
++NumCPEs;
@@ -1681,8 +1708,9 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
Br.MI = &MBB->back();
BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
if (isThumb)
- BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
- .addImm(ARMCC::AL).addReg(0);
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr))
+ .addMBB(DestBB)
+ .add(predOps(ARMCC::AL));
else
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
@@ -1709,8 +1737,15 @@ bool ARMConstantIslands::undoLRSpillRestore() {
MI->getNumExplicitOperands() == 3) {
// Create the new insn and copy the predicate from the old.
BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1));
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1));
+ MI->eraseFromParent();
+ MadeChange = true;
+ }
+ if (MI->getOpcode() == ARM::tPUSH &&
+ MI->getOperand(2).getReg() == ARM::LR &&
+ MI->getNumExplicitOperands() == 3) {
+ // Just remove the push.
MI->eraseFromParent();
MadeChange = true;
}
@@ -1792,13 +1827,12 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
Bits = 11;
Scale = 2;
break;
- case ARM::t2Bcc: {
+ case ARM::t2Bcc:
NewOpc = ARM::tBcc;
Bits = 8;
Scale = 2;
break;
}
- }
if (NewOpc) {
unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
@@ -1983,6 +2017,54 @@ static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
&*MBB->begin() == CPEMI;
}
+static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
+ MachineInstr *JumpMI,
+ unsigned &DeadSize) {
+ // Remove a dead add between the LEA and JT, which used to compute EntryReg,
+ // but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg
+ // and is not clobbered / used.
+ MachineInstr *RemovableAdd = nullptr;
+ unsigned EntryReg = JumpMI->getOperand(0).getReg();
+
+ // Find the last ADD to set EntryReg
+ MachineBasicBlock::iterator I(LEAMI);
+ for (++I; &*I != JumpMI; ++I) {
+ if (I->getOpcode() == ARM::t2ADDrs && I->getOperand(0).getReg() == EntryReg)
+ RemovableAdd = &*I;
+ }
+
+ if (!RemovableAdd)
+ return;
+
+ // Ensure EntryReg is not clobbered or used.
+ MachineBasicBlock::iterator J(RemovableAdd);
+ for (++J; &*J != JumpMI; ++J) {
+ for (unsigned K = 0, E = J->getNumOperands(); K != E; ++K) {
+ const MachineOperand &MO = J->getOperand(K);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == EntryReg)
+ return;
+ if (MO.isUse() && MO.getReg() == EntryReg)
+ return;
+ }
+ }
+
+ DEBUG(dbgs() << "Removing Dead Add: " << *RemovableAdd);
+ RemovableAdd->eraseFromParent();
+ DeadSize += 4;
+}
+
+static bool registerDefinedBetween(unsigned Reg,
+ MachineBasicBlock::iterator From,
+ MachineBasicBlock::iterator To,
+ const TargetRegisterInfo *TRI) {
+ for (auto I = From; I != To; ++I)
+ if (I->modifiesRegister(Reg, TRI))
+ return true;
+ return false;
+}
+
/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
bool ARMConstantIslands::optimizeThumb2JumpTables() {
@@ -2060,6 +2142,12 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
IdxReg = Shift->getOperand(2).getReg();
unsigned ShiftedIdxReg = Shift->getOperand(0).getReg();
+ // It's important that IdxReg is live until the actual TBB/TBH. Most of
+ // the range is checked later, but the LEA might still clobber it and not
+ // actually get removed.
+ if (BaseReg == IdxReg && !jumpTableFollowsTB(MI, User.CPEMI))
+ continue;
+
MachineInstr *Load = User.MI->getNextNode();
if (Load->getOpcode() != ARM::tLDRr)
continue;
@@ -2069,6 +2157,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
continue;
// If we're in PIC mode, there should be another ADD following.
+ auto *TRI = STI->getRegisterInfo();
if (isPositionIndependentOrROPI) {
MachineInstr *Add = Load->getNextNode();
if (Add->getOpcode() != ARM::tADDrr ||
@@ -2078,22 +2167,26 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
continue;
if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg())
continue;
-
+ if (registerDefinedBetween(IdxReg, Add->getNextNode(), MI, TRI))
+ // IdxReg gets redefined in the middle of the sequence.
+ continue;
Add->eraseFromParent();
DeadSize += 2;
} else {
if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg())
continue;
+ if (registerDefinedBetween(IdxReg, Load->getNextNode(), MI, TRI))
+ // IdxReg gets redefined in the middle of the sequence.
+ continue;
}
-
-
+
// Now safe to delete the load and lsl. The LEA will be removed later.
CanDeleteLEA = true;
Shift->eraseFromParent();
Load->eraseFromParent();
DeadSize += 4;
}
-
+
DEBUG(dbgs() << "Shrink JT: " << *MI);
MachineInstr *CPEMI = User.CPEMI;
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
@@ -2117,7 +2210,10 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
NewJTMI->getOperand(0).setReg(ARM::PC);
NewJTMI->getOperand(0).setIsKill(false);
- if (CanDeleteLEA) {
+ if (CanDeleteLEA) {
+ if (isThumb2)
+ RemoveDeadAddBetweenLEAAndJT(User.MI, MI, DeadSize);
+
User.MI->eraseFromParent();
DeadSize += isThumb2 ? 4 : 2;
@@ -2238,13 +2334,11 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
if (isThumb2)
BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B))
.addMBB(BB)
- .addImm(ARMCC::AL)
- .addReg(0);
+ .add(predOps(ARMCC::AL));
else
BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB))
.addMBB(BB)
- .addImm(ARMCC::AL)
- .addReg(0);
+ .add(predOps(ARMCC::AL));
// Update internal data structures to account for the newly inserted MBB.
MF->RenumberBlocks(NewBB);
@@ -2256,3 +2350,12 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
++NumJTInserted;
return NewBB;
}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+ return new ARMConstantIslands();
+}
+
+INITIALIZE_PASS(ARMConstantIslands, "arm-cp-islands", ARM_CP_ISLANDS_OPT_NAME,
+ false, false)
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 2d1602873ce0..9705c8b718b7 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -13,13 +13,17 @@
#include "ARMConstantPoolValue.h"
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
+
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -44,7 +48,7 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id,
LabelId(id), Kind(kind), PCAdjust(PCAdj), Modifier(modifier),
AddCurrentAddress(addCurrentAddress) {}
-ARMConstantPoolValue::~ARMConstantPoolValue() {}
+ARMConstantPoolValue::~ARMConstantPoolValue() = default;
StringRef ARMConstantPoolValue::getModifierText() const {
switch (Modifier) {
@@ -94,9 +98,11 @@ ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
return false;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ARMConstantPoolValue::dump() const {
errs() << " " << *this;
}
+#endif
void ARMConstantPoolValue::print(raw_ostream &O) const {
if (Modifier) O << "(" << getModifierText() << ")";
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 5f61832aa740..61c521581f79 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -14,10 +14,11 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
#define LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cstddef>
+#include <string>
+#include <vector>
namespace llvm {
@@ -29,6 +30,7 @@ class LLVMContext;
class MachineBasicBlock;
namespace ARMCP {
+
enum ARMCPKind {
CPValue,
CPExtSymbol,
@@ -47,7 +49,8 @@ namespace ARMCP {
SECREL, /// Section Relative (Windows TLS)
SBREL, /// Static Base Relative (RWPI)
};
-}
+
+} // end namespace ARMCP
/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
/// represent PC-relative displacement between the address of the load
@@ -169,9 +172,11 @@ public:
const GlobalValue *getGV() const;
const BlockAddress *getBlockAddress() const;
+
const GlobalVariable *getPromotedGlobal() const {
return dyn_cast_or_null<GlobalVariable>(GVar);
}
+
const Constant *getPromotedGlobalInit() const {
return CVal;
}
@@ -186,6 +191,7 @@ public:
void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
void print(raw_ostream &O) const override;
+
static bool classof(const ARMConstantPoolValue *APV) {
return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA() ||
APV->isPromotedGlobal();
@@ -267,6 +273,6 @@ public:
}
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index baa4e0330cf4..e0aecff2633b 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -19,6 +19,7 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -30,6 +31,7 @@
#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "arm-pseudo"
@@ -97,9 +99,9 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
const MachineOperand &MO = OldMI.getOperand(i);
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
- UseMI.addOperand(MO);
+ UseMI.add(MO);
else
- DefMI.addOperand(MO);
+ DefMI.add(MO);
}
}
@@ -415,14 +417,14 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
if (TableEntry->isUpdating)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->hasWritebackOperand)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// For an instruction writing double-spaced subregs, the pseudo instruction
// has an extra operand that is a use of the super-register. Record the
@@ -432,15 +434,15 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
SrcOpIdx = OpIdx++;
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the super-register source operand used for double-spaced subregs over
// to the new instruction as an implicit operand.
if (SrcOpIdx != 0) {
MachineOperand MO = MI.getOperand(SrcOpIdx);
MO.setImplicit(true);
- MIB.addOperand(MO);
+ MIB.add(MO);
}
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
@@ -467,14 +469,14 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
if (TableEntry->isUpdating)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->hasWritebackOperand)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
@@ -490,8 +492,8 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MIB.addReg(D3, getUndefRegState(SrcIsUndef));
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
MIB->addRegisterKilled(SrcReg, TRI, true);
@@ -549,14 +551,14 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
}
if (TableEntry->isUpdating)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->hasWritebackOperand)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Grab the super-register source.
MachineOperand MO = MI.getOperand(OpIdx++);
@@ -579,12 +581,12 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
OpIdx += 1;
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the super-register source to be an implicit source.
MO.setImplicit(true);
- MIB.addOperand(MO);
+ MIB.add(MO);
if (TableEntry->IsLoad)
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
@@ -605,9 +607,9 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned OpIdx = 0;
// Transfer the destination register operand.
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
if (IsExt)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
@@ -616,11 +618,11 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
MIB.addReg(D0);
// Copy the other source register operand.
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Add an implicit kill and use for the super-reg.
MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
@@ -659,6 +661,7 @@ static bool IsAnAddressOperand(const MachineOperand &MO) {
return false;
case MachineOperand::MO_IntrinsicID:
case MachineOperand::MO_Predicate:
+ case MachineOperand::MO_Placeholder:
llvm_unreachable("should not exist post-isel");
}
llvm_unreachable("unhandled machine operand type");
@@ -696,8 +699,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
HI16 = HI16.addImm(SOImmValV2);
LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- LO16.addImm(Pred).addReg(PredReg).addReg(0);
- HI16.addImm(Pred).addReg(PredReg).addReg(0);
+ LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
+ HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
return;
@@ -797,7 +800,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
.addReg(Desired.getReg(), RegState::Kill);
if (!IsThumb)
MIB.addImm(0);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
// .Lloadcmp:
@@ -814,12 +817,13 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
MIB.addReg(Addr.getReg());
if (LdrexOp == ARM::t2LDREX)
MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
- AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
- .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
- .addOperand(Desired));
+ BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
+ .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
+ .add(Desired)
+ .add(predOps(ARMCC::AL));
unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
BuildMI(LoadCmpBB, DL, TII->get(Bcc))
.addMBB(DoneBB)
@@ -838,16 +842,17 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg);
- MIB.addOperand(New);
- MIB.addOperand(Addr);
+ MIB.add(New);
+ MIB.add(Addr);
if (StrexOp == ARM::t2STREX)
MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
- AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri))
- .addReg(StatusReg, RegState::Kill)
- .addImm(0));
+ BuildMI(StoreBB, DL, TII->get(CMPri))
+ .addReg(StatusReg, RegState::Kill)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
BuildMI(StoreBB, DL, TII->get(Bcc))
.addMBB(LoadCmpBB)
.addImm(ARMCC::NE)
@@ -927,13 +932,13 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
MachineInstrBuilder MIB;
MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD));
addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI);
- MIB.addReg(Addr.getReg());
- AddDefaultPred(MIB);
+ MIB.addReg(Addr.getReg()).add(predOps(ARMCC::AL));
unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
- AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
- .addReg(DestLo, getKillRegState(Dest.isDead()))
- .addReg(DesiredLo, getKillRegState(Desired.isDead())));
+ BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
+ .addReg(DestLo, getKillRegState(Dest.isDead()))
+ .addReg(DesiredLo, getKillRegState(Desired.isDead()))
+ .add(predOps(ARMCC::AL));
BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
.addReg(DestHi, getKillRegState(Dest.isDead()))
@@ -959,13 +964,13 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg);
addExclusiveRegPair(MIB, New, 0, IsThumb, TRI);
- MIB.addOperand(Addr);
- AddDefaultPred(MIB);
+ MIB.add(Addr).add(predOps(ARMCC::AL));
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
- AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri))
- .addReg(StatusReg, RegState::Kill)
- .addImm(0));
+ BuildMI(StoreBB, DL, TII->get(CMPri))
+ .addReg(StatusReg, RegState::Kill)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
BuildMI(StoreBB, DL, TII->get(Bcc))
.addMBB(LoadCmpBB)
.addImm(ARMCC::NE)
@@ -1026,7 +1031,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Add the default predicate in Thumb mode.
if (STI->isThumb())
- MIB.addImm(ARMCC::AL).addReg(0);
+ MIB.add(predOps(ARMCC::AL));
} else if (RetOpcode == ARM::TCRETURNri) {
BuildMI(MBB, MBBI, dl,
TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
@@ -1047,9 +1052,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
MI.getOperand(1).getReg())
- .addOperand(MI.getOperand(2))
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4));
+ .add(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4));
MI.eraseFromParent();
return true;
@@ -1059,10 +1064,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
- .addOperand(MI.getOperand(2))
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4))
- .addReg(0); // 's' bit
+ .add(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1070,11 +1075,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVCCsi: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
(MI.getOperand(1).getReg()))
- .addOperand(MI.getOperand(2))
- .addImm(MI.getOperand(3).getImm())
- .addImm(MI.getOperand(4).getImm()) // 'pred'
- .addOperand(MI.getOperand(5))
- .addReg(0); // 's' bit
+ .add(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .add(MI.getOperand(5))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1082,12 +1087,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVCCsr: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
(MI.getOperand(1).getReg()))
- .addOperand(MI.getOperand(2))
- .addOperand(MI.getOperand(3))
- .addImm(MI.getOperand(4).getImm())
- .addImm(MI.getOperand(5).getImm()) // 'pred'
- .addOperand(MI.getOperand(6))
- .addReg(0); // 's' bit
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .addImm(MI.getOperand(4).getImm())
+ .addImm(MI.getOperand(5).getImm()) // 'pred'
+ .add(MI.getOperand(6))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1097,9 +1102,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
MI.getOperand(1).getReg())
- .addImm(MI.getOperand(2).getImm())
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4));
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4));
MI.eraseFromParent();
return true;
}
@@ -1108,10 +1113,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
- .addImm(MI.getOperand(2).getImm())
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4))
- .addReg(0); // 's' bit
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1121,10 +1126,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
- .addImm(MI.getOperand(2).getImm())
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4))
- .addReg(0); // 's' bit
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1143,11 +1148,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
MI.getOperand(1).getReg())
- .addOperand(MI.getOperand(2))
- .addImm(MI.getOperand(3).getImm())
- .addImm(MI.getOperand(4).getImm()) // 'pred'
- .addOperand(MI.getOperand(5))
- .addReg(0); // 's' bit
+ .add(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .add(MI.getOperand(5))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
}
@@ -1187,10 +1192,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
"bits set.");
unsigned bicOpc = AFI->isThumbFunction() ?
ARM::t2BICri : ARM::BICri;
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(bicOpc), ARM::R6)
- .addReg(ARM::R6, RegState::Kill)
- .addImm(MaxAlign-1)));
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6)
+ .addReg(ARM::R6, RegState::Kill)
+ .addImm(MaxAlign - 1)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
}
@@ -1201,24 +1207,25 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVsrl_flag:
case ARM::MOVsra_flag: {
// These are just fancy MOVs instructions.
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
- MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1))
- .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
- ARM_AM::lsr : ARM_AM::asr),
- 1)))
- .addReg(ARM::CPSR, RegState::Define);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(
+ (Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1))
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::CPSR, RegState::Define);
MI.eraseFromParent();
return true;
}
case ARM::RRX: {
// This encodes as "MOVs Rd, Rm, rrx
MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi),
- MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1))
- .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
- .addReg(0);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
@@ -1241,18 +1248,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
if (!Thumb)
MIB.addImm(0);
- MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.add(predOps(ARMCC::AL));
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Thumb ? ARM::tBLXr : ARM::BLX));
if (Thumb)
- MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.add(predOps(ARMCC::AL));
MIB.addReg(Reg, RegState::Kill);
} else {
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Thumb ? ARM::tBL : ARM::BL));
if (Thumb)
- MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.add(predOps(ARMCC::AL));
MIB.addExternalSymbol("__aeabi_read_tp", 0);
}
@@ -1268,15 +1275,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(NewLdOpc), DstReg)
- .addOperand(MI.getOperand(1)));
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
+ .add(MI.getOperand(1))
+ .add(predOps(ARMCC::AL));
MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(ARM::tPICADD))
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
- .addOperand(MI.getOperand(2));
+ MachineInstrBuilder MIB2 =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .add(MI.getOperand(2));
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
@@ -1319,7 +1326,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
if (IsARM)
MIB.addImm(0);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
if (IsPIC) {
MachineInstrBuilder MIB =
@@ -1329,7 +1336,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addImm(ARMPCLabelIndex);
if (IsARM)
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
MI.eraseFromParent();
@@ -1368,7 +1375,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg).addImm(LabelId);
if (isARM) {
- AddDefaultPred(MIB3);
+ MIB3.add(predOps(ARMCC::AL));
if (Opcode == ARM::MOV_ga_pcrel_ldr)
MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
}
@@ -1388,9 +1395,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
.addReg(ARM::LR)
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
.addReg(ARM::CPSR, RegState::Undef);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
@@ -1407,11 +1414,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
// Copy the source register.
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Add the destination operands (D subregs).
unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
@@ -1438,11 +1445,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
// Copy the destination register.
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Add the source operands (D subregs).
unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index df4dcb375750..01e062bd185c 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
@@ -21,30 +22,61 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
+
using namespace llvm;
namespace {
@@ -54,24 +86,22 @@ namespace {
enum {
RegBase,
FrameIndexBase
- } BaseType;
+ } BaseType = RegBase;
union {
unsigned Reg;
int FI;
} Base;
- int Offset;
+ int Offset = 0;
// Innocuous defaults for our address.
- Address()
- : BaseType(RegBase), Offset(0) {
- Base.Reg = 0;
- }
+ Address() {
+ Base.Reg = 0;
+ }
} Address;
class ARMFastISel final : public FastISel {
-
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
@@ -99,8 +129,9 @@ class ARMFastISel final : public FastISel {
Context = &funcInfo.Fn->getContext();
}
- // Code from FastISel.cpp.
private:
+ // Code from FastISel.cpp.
+
unsigned fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill);
@@ -117,18 +148,18 @@ class ARMFastISel final : public FastISel {
uint64_t Imm);
// Backend specific FastISel code.
- private:
+
bool fastSelectInstruction(const Instruction *I) override;
unsigned fastMaterializeConstant(const Constant *C) override;
unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) override;
bool fastLowerArguments() override;
- private:
+
#include "ARMGenFastISel.inc"
// Instruction selection routines.
- private:
+
bool SelectLoad(const Instruction *I);
bool SelectStore(const Instruction *I);
bool SelectBranch(const Instruction *I);
@@ -151,12 +182,12 @@ class ARMFastISel final : public FastISel {
bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
// Utility routines.
- private:
+
bool isPositionIndependent() const;
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
- bool isZExt);
+ bool isZExt, bool isEquality);
bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
@@ -179,7 +210,7 @@ class ARMFastISel final : public FastISel {
const TargetLowering *getTargetLowering() { return &TLI; }
// Call handling routines.
- private:
+
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
bool Return,
bool isVarArg);
@@ -198,7 +229,7 @@ class ARMFastISel final : public FastISel {
bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
// OptionalDef handling routines.
- private:
+
bool isARMNEONPred(const MachineInstr *MI);
bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
@@ -256,17 +287,13 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
// Are we NEON in ARM mode and have a predicate operand? If so, I know
// we're not predicable but add it anyways.
if (isARMNEONPred(MI))
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
// defines CPSR. All other OptionalDefines in ARM are the CCR register.
bool CPSR = false;
- if (DefinesOptionalPredicate(MI, &CPSR)) {
- if (CPSR)
- AddDefaultT1CC(MIB);
- else
- AddDefaultCC(MIB);
- }
+ if (DefinesOptionalPredicate(MI, &CPSR))
+ MIB.add(CPSR ? t1CondCodeOp() : condCodeOp());
return MIB;
}
@@ -434,7 +461,6 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
}
unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
-
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
return 0;
@@ -739,7 +765,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
- for (;;) {
+ while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
@@ -971,7 +997,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
// Create the base instruction, then add the operands.
if (allocReg)
ResultReg = createResultReg(RC);
- assert (ResultReg > 255 && "Expected an allocated virtual register.");
+ assert(ResultReg > 255 && "Expected an allocated virtual register.");
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
@@ -1216,7 +1242,6 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// behavior.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
-
// Get the compare predicate.
// Try to take advantage of fallthrough opportunities.
CmpInst::Predicate Predicate = CI->getPredicate();
@@ -1231,7 +1256,8 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
if (ARMPred == ARMCC::AL) return false;
// Emit the compare.
- if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CI->isEquality()))
return false;
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
@@ -1318,14 +1344,16 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
}
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
- bool isZExt) {
+ bool isZExt, bool isEquality) {
Type *Ty = Src1Value->getType();
EVT SrcEVT = TLI.getValueType(DL, Ty, true);
if (!SrcEVT.isSimple()) return false;
MVT SrcVT = SrcEVT.getSimpleVT();
- bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
- if (isFloat && !Subtarget->hasVFP2())
+ if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+ return false;
+
+ if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
return false;
// Check to see if the 2nd operand is a constant that we can encode directly
@@ -1364,10 +1392,18 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
// TODO: Verify compares.
case MVT::f32:
isICmp = false;
- CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
+ // Equality comparisons shouldn't raise Invalid on uordered inputs.
+ if (isEquality)
+ CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
+ else
+ CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
break;
case MVT::f64:
isICmp = false;
+ // Equality comparisons shouldn't raise Invalid on uordered inputs.
+ if (isEquality)
+ CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
+ else
CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
break;
case MVT::i1:
@@ -1444,7 +1480,8 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
if (ARMPred == ARMCC::AL) return false;
// Emit the compare.
- if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CI->isEquality()))
return false;
// Now set a register based on the comparison. Explicitly set the predicates
@@ -1466,7 +1503,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
bool ARMFastISel::SelectFPExt(const Instruction *I) {
// Make sure we have VFP and that we're extending float to double.
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
Value *V = I->getOperand(0);
if (!I->getType()->isDoubleTy() ||
@@ -1485,7 +1522,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
// Make sure we have VFP and that we're truncating double to float.
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
Value *V = I->getOperand(0);
if (!(I->getType()->isFloatTy() &&
@@ -1536,7 +1573,8 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
unsigned Opc;
if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
- else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
+ else if (Ty->isDoubleTy() && !Subtarget->isFPOnlySP())
+ Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
else return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
@@ -1561,7 +1599,8 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
unsigned Opc;
Type *OpTy = I->getOperand(0)->getType();
if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
- else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
+ else if (OpTy->isDoubleTy() && !Subtarget->isFPOnlySP())
+ Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
else return false;
// f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
@@ -1596,7 +1635,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
bool UseImm = false;
bool isNegativeImm = false;
if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
- assert (VT == MVT::i32 && "Expecting an i32.");
+ assert(VT == MVT::i32 && "Expecting an i32.");
Imm = (int)ConstInt->getValue().getZExtValue();
if (Imm < 0) {
isNegativeImm = true;
@@ -1765,8 +1804,9 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
// if we have them.
// FIXME: It'd be nice to use NEON instructions.
Type *Ty = I->getType();
- bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
- if (isFloat && !Subtarget->hasVFP2())
+ if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+ return false;
+ if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
return false;
unsigned Opc;
@@ -1926,7 +1966,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
case CCValAssign::SExt: {
MVT DestVT = VA.getLocVT();
Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
- assert (Arg != 0 && "Failed to emit a sext");
+ assert(Arg != 0 && "Failed to emit a sext");
ArgVT = DestVT;
break;
}
@@ -1935,7 +1975,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
- assert (Arg != 0 && "Failed to emit a zext");
+ assert(Arg != 0 && "Failed to emit a zext");
ArgVT = DestVT;
break;
}
@@ -2230,7 +2270,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
DbgLoc, TII.get(CallOpc));
// BL / BLX don't take a predicate, but tBL / tBLX do.
if (isThumb2)
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
if (Subtarget->genLongCalls())
MIB.addReg(CalleeReg);
else
@@ -2311,19 +2351,19 @@ bool ARMFastISel::SelectCall(const Instruction *I,
break;
ISD::ArgFlagsTy Flags;
- unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ unsigned ArgIdx = i - CS.arg_begin();
+ if (CS.paramHasAttr(ArgIdx, Attribute::SExt))
Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ if (CS.paramHasAttr(ArgIdx, Attribute::ZExt))
Flags.setZExt();
// FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
- CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
- CS.paramHasAttr(AttrInd, Attribute::SwiftSelf) ||
- CS.paramHasAttr(AttrInd, Attribute::SwiftError) ||
- CS.paramHasAttr(AttrInd, Attribute::Nest) ||
- CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ if (CS.paramHasAttr(ArgIdx, Attribute::InReg) ||
+ CS.paramHasAttr(ArgIdx, Attribute::StructRet) ||
+ CS.paramHasAttr(ArgIdx, Attribute::SwiftSelf) ||
+ CS.paramHasAttr(ArgIdx, Attribute::SwiftError) ||
+ CS.paramHasAttr(ArgIdx, Attribute::Nest) ||
+ CS.paramHasAttr(ArgIdx, Attribute::ByVal))
return false;
Type *ArgTy = (*i)->getType();
@@ -2373,7 +2413,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// ARM calls don't take a predicate, but tBL / tBLX do.
if(isThumb2)
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
if (UseReg)
MIB.addReg(CalleeReg);
else if (!IntrMemName)
@@ -2418,7 +2458,7 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
else if (Len >= 2)
VT = MVT::i16;
else {
- assert (Len == 1 && "Expected a length of 1!");
+ assert(Len == 1 && "Expected a length of 1!");
VT = MVT::i8;
}
} else {
@@ -2433,9 +2473,9 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
bool RV;
unsigned ResultReg;
RV = ARMEmitLoad(VT, ResultReg, Src);
- assert (RV == true && "Should be able to handle this load.");
+ assert(RV && "Should be able to handle this load.");
RV = ARMEmitStore(VT, ResultReg, Dest);
- assert (RV == true && "Should be able to handle this store.");
+ assert(RV && "Should be able to handle this store.");
(void)RV;
unsigned Size = VT.getSizeInBits()/8;
@@ -2687,9 +2727,11 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
if (setsCPSR)
MIB.addReg(ARM::CPSR, RegState::Define);
SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
- AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc));
+ MIB.addReg(SrcReg, isKill * RegState::Kill)
+ .addImm(ImmEnc)
+ .add(predOps(ARMCC::AL));
if (hasS)
- AddDefaultCC(MIB);
+ MIB.add(condCodeOp());
// Second instruction consumes the first's result.
SrcReg = ResultReg;
}
@@ -2779,7 +2821,6 @@ bool ARMFastISel::SelectShift(const Instruction *I,
// TODO: SoftFP support.
bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
-
switch (I->getOpcode()) {
case Instruction::Load:
return SelectLoad(I);
@@ -2849,6 +2890,7 @@ bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
}
namespace {
+
// This table describes sign- and zero-extend instructions which can be
// folded into a preceding load. All of these extends have an immediate
// (sometimes a mask and sometimes a shift) that's applied after
@@ -2865,7 +2907,8 @@ const struct FoldableLoadExtendsStruct {
{ { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 },
{ { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 }
};
-}
+
+} // end anonymous namespace
/// \brief The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
@@ -2933,7 +2976,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
.addConstantPoolIndex(Idx);
if (Opc == ARM::LDRcp)
MIB.addImm(0);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
// Fix the address by adding pc.
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
@@ -2944,7 +2987,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
.addReg(TempReg)
.addImm(ARMPCLabelIndex);
if (!Subtarget->isThumb())
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
if (UseGOT_PREL && Subtarget->isThumb()) {
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
@@ -3010,7 +3053,6 @@ bool ARMFastISel::fastLowerArguments() {
}
}
-
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
@@ -3035,6 +3077,7 @@ bool ARMFastISel::fastLowerArguments() {
}
namespace llvm {
+
FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
@@ -3042,4 +3085,5 @@ namespace llvm {
return nullptr;
}
-}
+
+} // end namespace llvm
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
index 0c910ab6130f..8c0df4c2cbf9 100644
--- a/lib/Target/ARM/ARMFeatures.h
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -19,10 +19,10 @@
namespace llvm {
template<typename InstrType> // could be MachineInstr or MCInst
-bool IsCPSRDead(InstrType *Instr);
+bool IsCPSRDead(const InstrType *Instr);
template<typename InstrType> // could be MachineInstr or MCInst
-inline bool isV8EligibleForIT(InstrType *Instr) {
+inline bool isV8EligibleForIT(const InstrType *Instr) {
switch (Instr->getOpcode()) {
default:
return false;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index c72db8aca108..37be22bed540 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -16,19 +16,49 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+#include <vector>
#define DEBUG_TYPE "arm-frame-lowering"
@@ -180,6 +210,7 @@ static bool WindowsRequiresStackProbe(const MachineFunction &MF,
}
namespace {
+
struct StackAdjustingInsts {
struct InstInfo {
MachineBasicBlock::iterator I;
@@ -196,7 +227,8 @@ struct StackAdjustingInsts {
}
void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
- auto Info = find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
+ auto Info =
+ llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
assert(Info != Insts.end() && "invalid sp adjusting instruction");
Info->SPAdjust += ExtraBytes;
}
@@ -219,7 +251,8 @@ struct StackAdjustingInsts {
}
}
};
-}
+
+} // end anonymous namespace
/// Emit an instruction sequence that will align the address in
/// register Reg by zero-ing out the lower bits. For versions of the
@@ -252,35 +285,40 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
// lsr Reg, Reg, log2(Alignment)
// lsl Reg, Reg, log2(Alignment)
if (CanUseBFC) {
- AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(~AlignMask));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(~AlignMask)
+ .add(predOps(ARMCC::AL));
} else if (AlignMask <= 255) {
- AddDefaultCC(
- AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(AlignMask)));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(AlignMask)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
} else {
assert(!MustBeSingleInstruction &&
"Shouldn't call emitAligningInstructions demanding a single "
"instruction to be emitted for large stack alignment for a target "
"without BFC.");
- AddDefaultCC(AddDefaultPred(
- BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))));
- AddDefaultCC(AddDefaultPred(
- BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
} else {
// Since this is only reached for Thumb-2 targets, the BFC instruction
// should always be available.
assert(CanUseBFC);
- AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(~AlignMask));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(~AlignMask)
+ .add(predOps(ARMCC::AL));
}
}
@@ -448,9 +486,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
uint32_t NumWords = NumBytes >> 2;
if (NumWords < 65536)
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
- .addImm(NumWords)
- .setMIFlags(MachineInstr::FrameSetup));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
else
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
.addImm(NumWords)
@@ -462,10 +501,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case CodeModel::Default:
case CodeModel::Kernel:
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addExternalSymbol("__chkstk")
- .addReg(ARM::R4, RegState::Implicit)
- .setMIFlags(MachineInstr::FrameSetup);
+ .add(predOps(ARMCC::AL))
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
break;
case CodeModel::Large:
case CodeModel::JITDefault:
@@ -474,18 +513,19 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlags(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addReg(ARM::R12, RegState::Kill)
- .addReg(ARM::R4, RegState::Implicit)
- .setMIFlags(MachineInstr::FrameSetup);
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::R12, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
break;
}
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr),
- ARM::SP)
- .addReg(ARM::SP, RegState::Kill)
- .addReg(ARM::R4, RegState::Kill)
- .setMIFlags(MachineInstr::FrameSetup)));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
NumBytes = 0;
}
@@ -657,12 +697,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// -- out lower bits in r4
// mov sp, r4
// FIXME: It will be better just to find spare register here.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
- .addReg(ARM::SP, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
+ .addReg(ARM::SP, RegState::Kill)
+ .add(predOps(ARMCC::AL));
emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
false);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
- .addReg(ARM::R4, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4, RegState::Kill)
+ .add(predOps(ARMCC::AL));
}
AFI->setShouldRestoreSPFromFP(true);
@@ -675,14 +717,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// FIXME: Clarify FrameSetup flags here.
if (RegInfo->hasBasePointer(MF)) {
if (isARM)
- BuildMI(MBB, MBBI, dl,
- TII.get(ARM::MOVr), RegInfo->getBaseRegister())
- .addReg(ARM::SP)
- .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
else
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- RegInfo->getBaseRegister())
- .addReg(ARM::SP));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
}
// If the frame has variable sized objects then the epilogue must restore
@@ -757,19 +799,21 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"No scratch register to restore SP from FP!");
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
ARMCC::AL, 0, TII);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- ARM::SP)
- .addReg(ARM::R4));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4)
+ .add(predOps(ARMCC::AL));
}
} else {
// Thumb2 or ARM.
if (isARM)
BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
- .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ .addReg(FramePtr)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
else
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- ARM::SP)
- .addReg(FramePtr));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(FramePtr)
+ .add(predOps(ARMCC::AL));
}
} else if (NumBytes &&
!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
@@ -829,7 +873,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
// When dynamically realigning the stack, use the frame pointer for
// parameters, and the stack/base pointer for locals.
if (RegInfo->needsStackRealignment(MF)) {
- assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+ assert(hasFP(MF) && "dynamic stack realignment without a FP!");
if (isFixed) {
FrameReg = RegInfo->getFrameRegister(MF);
Offset = FPOffset;
@@ -936,18 +980,19 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
});
if (Regs.size() > 1 || StrOpc== 0) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
- .addReg(ARM::SP).setMIFlags(MIFlags));
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
+ .addReg(ARM::SP)
+ .setMIFlags(MIFlags)
+ .add(predOps(ARMCC::AL));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
} else if (Regs.size() == 1) {
- MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
- ARM::SP)
- .addReg(Regs[0].first, getKillRegState(Regs[0].second))
- .addReg(ARM::SP).setMIFlags(MIFlags)
- .addImm(-4);
- AddDefaultPred(MIB);
+ BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
+ .addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(ARM::SP)
+ .setMIFlags(MIFlags)
+ .addImm(-4)
+ .add(predOps(ARMCC::AL));
}
Regs.clear();
@@ -1027,9 +1072,9 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
});
if (Regs.size() > 1 || LdrOpc == 0) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
- .addReg(ARM::SP));
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
if (DeleteRet && MI != MBB.end()) {
@@ -1053,7 +1098,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
} else
MIB.addImm(4);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
Regs.clear();
@@ -1114,9 +1159,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// sub r4, sp, #numregs * 8
// The immediate is <= 64, so it doesn't need any special encoding.
unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
- .addReg(ARM::SP)
- .addImm(8 * NumAlignedDPRCS2Regs)));
+ BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addReg(ARM::SP)
+ .addImm(8 * NumAlignedDPRCS2Regs)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
// We must set parameter MustBeSingleInstruction to true, since
@@ -1132,10 +1179,10 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// Leave r4 live, it is used below.
Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
- .addReg(ARM::R4);
- MIB = AddDefaultPred(MIB);
+ .addReg(ARM::R4)
+ .add(predOps(ARMCC::AL));
if (!isThumb)
- AddDefaultCC(MIB);
+ MIB.add(condCodeOp());
// Now spill NumAlignedDPRCS2Regs registers starting from d8.
// r4 holds the stack slot address.
@@ -1147,11 +1194,12 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QQPRRegClass);
MBB.addLiveIn(SupReg);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
- ARM::R4)
- .addReg(ARM::R4, RegState::Kill).addImm(16)
- .addReg(NextReg)
- .addReg(SupReg, RegState::ImplicitKill));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(16)
+ .addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill)
+ .add(predOps(ARMCC::AL));
NextReg += 4;
NumAlignedDPRCS2Regs -= 4;
}
@@ -1165,9 +1213,12 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QQPRRegClass);
MBB.addLiveIn(SupReg);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
- .addReg(ARM::R4).addImm(16).addReg(NextReg)
- .addReg(SupReg, RegState::ImplicitKill));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
+ .addReg(ARM::R4)
+ .addImm(16)
+ .addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill)
+ .add(predOps(ARMCC::AL));
NextReg += 4;
NumAlignedDPRCS2Regs -= 4;
}
@@ -1177,8 +1228,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QPRRegClass);
MBB.addLiveIn(SupReg);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
- .addReg(ARM::R4).addImm(16).addReg(SupReg));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
+ .addReg(ARM::R4)
+ .addImm(16)
+ .addReg(SupReg)
+ .add(predOps(ARMCC::AL));
NextReg += 2;
NumAlignedDPRCS2Regs -= 2;
}
@@ -1187,9 +1241,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs) {
MBB.addLiveIn(NextReg);
// vstr.64 uses addrmode5 which has an offset scale of 4.
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
- .addReg(NextReg)
- .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
+ .addReg(NextReg)
+ .addReg(ARM::R4)
+ .addImm((NextReg - R4BaseReg) * 2)
+ .add(predOps(ARMCC::AL));
}
// The last spill instruction inserted should kill the scratch register r4.
@@ -1254,8 +1310,11 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
- .addFrameIndex(D8SpillFI).addImm(0)));
+ BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addFrameIndex(D8SpillFI)
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
// Now restore NumAlignedDPRCS2Regs registers starting from d8.
unsigned NextReg = ARM::D8;
@@ -1264,10 +1323,12 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs >= 6) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QQPRRegClass);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
- .addReg(ARM::R4, RegState::Define)
- .addReg(ARM::R4, RegState::Kill).addImm(16)
- .addReg(SupReg, RegState::ImplicitDefine));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
+ .addReg(ARM::R4, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine)
+ .add(predOps(ARMCC::AL));
NextReg += 4;
NumAlignedDPRCS2Regs -= 4;
}
@@ -1280,9 +1341,11 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs >= 4) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QQPRRegClass);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
- .addReg(ARM::R4).addImm(16)
- .addReg(SupReg, RegState::ImplicitDefine));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
+ .addReg(ARM::R4)
+ .addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine)
+ .add(predOps(ARMCC::AL));
NextReg += 4;
NumAlignedDPRCS2Regs -= 4;
}
@@ -1291,16 +1354,20 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs >= 2) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QPRRegClass);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
- .addReg(ARM::R4).addImm(16));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
+ .addReg(ARM::R4)
+ .addImm(16)
+ .add(predOps(ARMCC::AL));
NextReg += 2;
NumAlignedDPRCS2Regs -= 2;
}
// Finally, use a vanilla vldr.64 for the remaining odd register.
if (NumAlignedDPRCS2Regs)
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
- .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
+ .addReg(ARM::R4)
+ .addImm(2 * (NextReg - R4BaseReg))
+ .add(predOps(ARMCC::AL));
// Last store kills r4.
std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
@@ -1633,13 +1700,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// worth the effort and added fragility?
unsigned EstimatedStackSize =
MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
- if (hasFP(MF)) {
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
if (AFI->hasStackFrame())
EstimatedStackSize += 4;
} else {
// If FP is not used, SP will be used to access arguments, so count the
// size of arguments into the estimation.
- EstimatedStackSize += MF.getInfo<ARMFunctionInfo>()->getArgumentStackSize();
+ EstimatedStackSize += AFI->getArgumentStackSize();
}
EstimatedStackSize += 16; // For possible paddings.
@@ -1650,7 +1718,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
- if (hasFP(MF)) {
+ if (HasFP) {
SavedRegs.set(FramePtr);
// If the frame pointer is required by the ABI, also spill LR so that we
// emit a complete frame record.
@@ -1658,11 +1726,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(ARM::LR);
LRSpilled = true;
NumGPRSpills++;
- auto LRPos = find(UnspilledCS1GPRs, ARM::LR);
+ auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
if (LRPos != UnspilledCS1GPRs.end())
UnspilledCS1GPRs.erase(LRPos);
}
- auto FPPos = find(UnspilledCS1GPRs, FramePtr);
+ auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
if (FPPos != UnspilledCS1GPRs.end())
UnspilledCS1GPRs.erase(FPPos);
NumGPRSpills++;
@@ -1721,7 +1789,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
// r7 can be used if it is not being used as the frame pointer.
- if (!hasFP(MF)) {
+ if (!HasFP) {
if (SavedRegs.test(ARM::R7)) {
--RegDeficit;
DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = "
@@ -1773,7 +1841,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
NumGPRSpills++;
CS1Spilled = true;
ExtraCSSpill = true;
- UnspilledCS1GPRs.erase(find(UnspilledCS1GPRs, Reg));
+ UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
if (Reg == ARM::LR)
LRSpilled = true;
}
@@ -1786,7 +1854,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(ARM::LR);
NumGPRSpills++;
SmallVectorImpl<unsigned>::iterator LRPos;
- LRPos = find(UnspilledCS1GPRs, (unsigned)ARM::LR);
+ LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
if (LRPos != UnspilledCS1GPRs.end())
UnspilledCS1GPRs.erase(LRPos);
@@ -2081,12 +2149,17 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// SR1: Scratch Register #1
// push {SR0, SR1}
if (Thumb) {
- AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH)))
- .addReg(ScratchReg0).addReg(ScratchReg1);
+ BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
} else {
- AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
- .addReg(ARM::SP, RegState::Define).addReg(ARM::SP))
- .addReg(ScratchReg0).addReg(ScratchReg1);
+ BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
}
// Emit the relevant DWARF information about the change in stack pointer as
@@ -2106,21 +2179,29 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// mov SR1, sp
if (Thumb) {
- AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
- .addReg(ARM::SP));
+ BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
} else if (CompareStackPointer) {
- AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
- .addReg(ARM::SP)).addReg(0);
+ BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
// sub SR1, sp, #StackSize
if (!CompareStackPointer && Thumb) {
- AddDefaultPred(
- AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1))
- .addReg(ScratchReg1).addImm(AlignedStackSize));
+ BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
+ .add(condCodeOp())
+ .addReg(ScratchReg1)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL));
} else if (!CompareStackPointer) {
- AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
- .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0);
+ BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
+ .addReg(ARM::SP)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
if (Thumb && ST->isThumb1Only()) {
@@ -2131,21 +2212,25 @@ void ARMFrameLowering::adjustForSegmentedStacks(
unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
// ldr SR0, [pc, offset(STACK_LIMIT)]
- AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
- .addConstantPoolIndex(CPI));
+ BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
+ .addConstantPoolIndex(CPI)
+ .add(predOps(ARMCC::AL));
// ldr SR0, [SR0]
- AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
- .addReg(ScratchReg0).addImm(0));
+ BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
+ .addReg(ScratchReg0)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
} else {
// Get TLS base address from the coprocessor
// mrc p15, #0, SR0, c13, c0, #3
- AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
- .addImm(15)
- .addImm(0)
- .addImm(13)
- .addImm(0)
- .addImm(3));
+ BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
+ .addImm(15)
+ .addImm(0)
+ .addImm(13)
+ .addImm(0)
+ .addImm(3)
+ .add(predOps(ARMCC::AL));
// Use the last tls slot on android and a private field of the TCP on linux.
assert(ST->isTargetAndroid() || ST->isTargetLinux());
@@ -2153,16 +2238,19 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Get the stack limit from the right offset
// ldr SR0, [sr0, #4 * TlsOffset]
- AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
- .addReg(ScratchReg0).addImm(4 * TlsOffset));
+ BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
+ .addReg(ScratchReg0)
+ .addImm(4 * TlsOffset)
+ .add(predOps(ARMCC::AL));
}
// Compare stack limit with stack size requested.
// cmp SR0, SR1
Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
- AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1));
+ BuildMI(GetMBB, DL, TII.get(Opcode))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1)
+ .add(predOps(ARMCC::AL));
// This jump is taken if StackLimit < SP - stack required.
Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
@@ -2178,32 +2266,40 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Pass first argument for the __morestack by Scratch Register #0.
// The amount size of stack required
if (Thumb) {
- AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8),
- ScratchReg0)).addImm(AlignedStackSize));
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
+ .add(condCodeOp())
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
- .addImm(AlignedStackSize)).addReg(0);
+ BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
// Pass second argument for the __morestack by Scratch Register #1.
// The amount size of stack consumed to save function arguments.
if (Thumb) {
- AddDefaultPred(
- AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1))
- .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())));
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
+ .add(condCodeOp())
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
- .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())))
- .addReg(0);
+ BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
// push {lr} - Save return address of this function.
if (Thumb) {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH)))
+ BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
.addReg(ARM::LR);
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP))
+ BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
.addReg(ARM::LR);
}
@@ -2220,7 +2316,8 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Call __morestack().
if (Thumb) {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL)))
+ BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
+ .add(predOps(ARMCC::AL))
.addExternalSymbol("__morestack");
} else {
BuildMI(AllocMBB, DL, TII.get(ARM::BL))
@@ -2230,22 +2327,26 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// pop {lr} - Restore return address of this original function.
if (Thumb) {
if (ST->isThumb1Only()) {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))
- .addReg(ScratchReg0);
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
- .addReg(ScratchReg0));
+ BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0);
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
+ .addReg(ScratchReg0)
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
- .addReg(ARM::LR, RegState::Define)
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP)
- .addImm(4));
+ BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .addImm(4)
+ .add(predOps(ARMCC::AL));
}
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP))
- .addReg(ARM::LR);
+ BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::LR);
}
// Restore SR0 and SR1 in case of __morestack() was called.
@@ -2253,15 +2354,17 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// scratch registers from here.
// pop {SR0, SR1}
if (Thumb) {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1);
+ BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1);
+ BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
}
// Update the CFA offset now that we've popped
@@ -2271,20 +2374,22 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// bx lr - Return from this function.
Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET;
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode)));
+ BuildMI(AllocMBB, DL, TII.get(Opcode)).add(predOps(ARMCC::AL));
// Restore SR0 and SR1 in case of __morestack() was not called.
// pop {SR0, SR1}
if (Thumb) {
- AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP)))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1);
+ BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
} else {
- AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1);
+ BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
}
// Update the CFA offset now that we've popped
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c3e9591d5c70..b07b4e1f5cfb 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -244,11 +244,8 @@ private:
bool tryInlineAsm(SDNode *N);
- void SelectConcatVector(SDNode *N);
void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
- bool trySMLAWSMULW(SDNode *N);
-
void SelectCMP_SWAP(SDNode *N);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
@@ -2559,141 +2556,6 @@ bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
return false;
}
-static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
- bool Accumulate) {
- // For SM*WB, we need to some form of sext.
- // For SM*WT, we need to search for (sra X, 16)
- // Src1 then gets set to X.
- if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
- SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
- SignExt.getOpcode() == ISD::AssertSext) &&
- SignExt.getValueType() == MVT::i32) {
-
- *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
- Src1 = SignExt.getOperand(0);
- return true;
- }
-
- if (SignExt.getOpcode() != ISD::SRA)
- return false;
-
- ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
- if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
- return false;
-
- SDValue Op0 = SignExt.getOperand(0);
-
- // The sign extend operand for SM*WB could be generated by a shl and ashr.
- if (Op0.getOpcode() == ISD::SHL) {
- SDValue SHL = Op0;
- ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
- if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
- return false;
-
- *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
- Src1 = Op0.getOperand(0);
- return true;
- }
- *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
- Src1 = SignExt.getOperand(0);
- return true;
-}
-
-static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
- SDValue &Src1, bool Accumulate) {
- // First we look for:
- // (add (or (srl ?, 16), (shl ?, 16)))
- if (OR.getOpcode() != ISD::OR)
- return false;
-
- SDValue SRL = OR.getOperand(0);
- SDValue SHL = OR.getOperand(1);
-
- if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
- SRL = OR.getOperand(1);
- SHL = OR.getOperand(0);
- if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
- return false;
- }
-
- ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
- ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
- if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
- SHLSrc1->getZExtValue() != 16)
- return false;
-
- // The first operands to the shifts need to be the two results from the
- // same smul_lohi node.
- if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
- SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
- return false;
-
- SDNode *SMULLOHI = SRL.getOperand(0).getNode();
- if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
- SHL.getOperand(0) != SDValue(SMULLOHI, 1))
- return false;
-
- // Now we have:
- // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
- // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
- // For SMLAWB the 16-bit value will signed extended somehow.
- // For SMLAWT only the SRA is required.
-
- // Check both sides of SMUL_LOHI
- if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
- Src0 = SMULLOHI->getOperand(1);
- } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
- Accumulate)) {
- Src0 = SMULLOHI->getOperand(0);
- } else {
- return false;
- }
- return true;
-}
-
-bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
- if (!Subtarget->hasV6Ops() ||
- (Subtarget->isThumb() && !Subtarget->hasThumb2()))
- return false;
-
- SDLoc dl(N);
- SDValue Src0 = N->getOperand(0);
- SDValue Src1 = N->getOperand(1);
- SDValue A, B;
- unsigned Opc = 0;
-
- if (N->getOpcode() == ISD::ADD) {
- if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
- return false;
-
- SDValue Acc;
- if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
- Acc = Src1;
- } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
- Acc = Src0;
- } else {
- return false;
- }
- if (Opc == 0)
- return false;
-
- SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32) };
- CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
- return true;
- } else if (N->getOpcode() == ISD::OR &&
- SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
- if (Opc == 0)
- return false;
-
- SDValue Ops[] = { A, B, getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32)};
- CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
- return true;
- }
- return false;
-}
-
/// We've got special pseudo-instructions for these
void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
unsigned Opcode;
@@ -2722,15 +2584,6 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
CurDAG->RemoveDeadNode(N);
}
-void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
- // The only time a CONCAT_VECTORS operation can have legal types is when
- // two 64-bit vectors are concatenated to a 128-bit vector.
- EVT VT = N->getValueType(0);
- if (!VT.is128BitVector() || N->getNumOperands() != 2)
- llvm_unreachable("unexpected CONCAT_VECTORS");
- ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
-}
-
static Optional<std::pair<unsigned, unsigned>>
getContiguousRangeOfSetBits(const APInt &A) {
unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
@@ -2822,11 +2675,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
- case ISD::ADD:
- case ISD::OR:
- if (trySMLAWSMULW(N))
- return;
- break;
case ISD::WRITE_REGISTER:
if (tryWriteRegister(N))
return;
@@ -3042,49 +2890,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
break;
}
- case ARMISD::VMOVRRD:
- ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
- N->getOperand(0), getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32)));
- return;
- case ISD::UMUL_LOHI: {
- if (Subtarget->isThumb1Only())
- break;
- if (Subtarget->isThumb()) {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
- ReplaceNode(
- N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops));
- return;
- } else {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
- CurDAG->getRegister(0, MVT::i32) };
- ReplaceNode(N, CurDAG->getMachineNode(
- Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl,
- MVT::i32, MVT::i32, Ops));
- return;
- }
- }
- case ISD::SMUL_LOHI: {
- if (Subtarget->isThumb1Only())
- break;
- if (Subtarget->isThumb()) {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
- ReplaceNode(
- N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops));
- return;
- } else {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
- CurDAG->getRegister(0, MVT::i32) };
- ReplaceNode(N, CurDAG->getMachineNode(
- Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl,
- MVT::i32, MVT::i32, Ops));
- return;
- }
- }
case ARMISD::UMAAL: {
unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
@@ -3095,38 +2900,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
case ARMISD::UMLAL:{
- // UMAAL is similar to UMLAL but it adds two 32-bit values to the
- // 64-bit multiplication result.
- if (Subtarget->hasV6Ops() && Subtarget->hasDSP() &&
- N->getOperand(2).getOpcode() == ARMISD::ADDC &&
- N->getOperand(3).getOpcode() == ARMISD::ADDE) {
-
- SDValue Addc = N->getOperand(2);
- SDValue Adde = N->getOperand(3);
-
- if (Adde.getOperand(2).getNode() == Addc.getNode()) {
-
- ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
- ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
-
- if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
- {
- // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
- // RdLo = one operand to be added, lower 32-bits of res
- // RdHi = other operand to be added, upper 32-bits of res
- // Rn = first multiply operand
- // Rm = second multiply operand
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- Addc.getOperand(0), Addc.getOperand(1),
- getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32) };
- unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
- CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
- return;
- }
- }
- }
-
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),
@@ -3277,26 +3050,23 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
int64_t Addend = -C->getSExtValue();
SDNode *Add = nullptr;
- // In T2 mode, ADDS can be better than CMN if the immediate fits in a
+ // ADDS can be better than CMN if the immediate fits in a
// 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
// Outside that range we can just use a CMN which is 32-bit but has a
// 12-bit immediate range.
- if (Subtarget->isThumb2() && Addend < 1<<8) {
- SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
- CurDAG->getRegister(0, MVT::i32) };
- Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
- } else if (!Subtarget->isThumb2() && Addend < 1<<8) {
- // FIXME: Add T1 tADDi8 code.
- SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
- CurDAG->getTargetConstant(Addend, dl, MVT::i32),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
- Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops);
- } else if (!Subtarget->isThumb2() && Addend < 1<<3) {
- SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
- CurDAG->getTargetConstant(Addend, dl, MVT::i32),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
- Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops);
+ if (Addend < 1<<8) {
+ if (Subtarget->isThumb2()) {
+ SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
+ } else {
+ unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
+ SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
+ CurDAG->getTargetConstant(Addend, dl, MVT::i32),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
+ Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
+ }
}
if (Add) {
SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
@@ -4013,10 +3783,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
- case ISD::CONCAT_VECTORS:
- SelectConcatVector(N);
- return;
-
case ISD::ATOMIC_CMP_SWAP:
SelectCMP_SWAP(N);
return;
@@ -4123,11 +3889,10 @@ static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
// The flags here are common to those allowed for apsr in the A class cores and
// those allowed for the special registers in the M class cores. Returns a
// value representing which flags were present, -1 if invalid.
-static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
- if (Flags.empty())
- return 0x2 | (int)hasDSP;
-
+static inline int getMClassFlagsMask(StringRef Flags) {
return StringSwitch<int>(Flags)
+ .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
+ // correct when flags are not permitted
.Case("g", 0x1)
.Case("nzcvq", 0x2)
.Case("nzcvqg", 0x3)
@@ -4170,7 +3935,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
}
// We know we are now handling a write so need to get the mask for the flags.
- int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
+ int Mask = getMClassFlagsMask(Flags);
// Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
// shouldn't have flags present.
@@ -4185,10 +3950,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
// The register was valid so need to put the mask in the correct place
// (the flags need to be in bits 11-10) and combine with the SYSmvalue to
// construct the operand for the instruction node.
- if (SYSmvalue < 0x4)
- return SYSmvalue | Mask << 10;
-
- return SYSmvalue;
+ return SYSmvalue | Mask << 10;
}
static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
@@ -4201,7 +3963,7 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
// The flags permitted for apsr are the same flags that are allowed in
// M class registers. We get the flag value and then shift the flags into
// the correct place to combine with the mask.
- Mask = getMClassFlagsMask(Flags, true);
+ Mask = getMClassFlagsMask(Flags);
if (Mask == -1)
return -1;
return Mask << 2;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 0f84a2359160..e697c8ca5339 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -12,47 +12,101 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMISelLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMPerfectShuffle.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
-#include "ARMTargetMachine.h"
-#include "ARMTargetObjectFile.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <string>
#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "arm-isel"
@@ -82,21 +136,6 @@ static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
cl::desc("Maximum size of ALL constants to promote into a constant pool"),
cl::init(128));
-namespace {
- class ARMCCState : public CCState {
- public:
- ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
- SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
- ParmContext PC)
- : CCState(CC, isVarArg, MF, locs, C) {
- assert(((PC == Call) || (PC == Prologue)) &&
- "ARMCCState users must specify whether their context is call"
- "or prologue generation.");
- CallOrPrologue = PC;
- }
- };
-}
-
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -685,10 +724,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
}
- // ARM and Thumb2 support UMLAL/SMLAL.
- if (!Subtarget->isThumb1Only())
- setTargetDAGCombine(ISD::ADDC);
-
if (Subtarget->isFPOnlySP()) {
// When targeting a floating-point unit with only single-precision
// operations, f64 is legal for the few double-precision instructions which
@@ -787,13 +822,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
- if (!Subtarget->isThumb1Only()) {
- // FIXME: We should do this for Thumb1 as well.
- setOperationAction(ISD::ADDC, MVT::i32, Custom);
- setOperationAction(ISD::ADDE, MVT::i32, Custom);
- setOperationAction(ISD::SUBC, MVT::i32, Custom);
- setOperationAction(ISD::SUBE, MVT::i32, Custom);
- }
+ setOperationAction(ISD::ADDC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDE, MVT::i32, Custom);
+ setOperationAction(ISD::SUBC, MVT::i32, Custom);
+ setOperationAction(ISD::SUBE, MVT::i32, Custom);
if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
@@ -1305,6 +1337,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
+ case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
+ case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
+ case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
+ case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
+ case ARMISD::SMULWB: return "ARMISD::SMULWB";
+ case ARMISD::SMULWT: return "ARMISD::SMULWT";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
@@ -1414,6 +1452,40 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
// Lowering Code
//===----------------------------------------------------------------------===//
+static bool isSRL16(const SDValue &Op) {
+ if (Op.getOpcode() != ISD::SRL)
+ return false;
+ if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return Const->getZExtValue() == 16;
+ return false;
+}
+
+static bool isSRA16(const SDValue &Op) {
+ if (Op.getOpcode() != ISD::SRA)
+ return false;
+ if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return Const->getZExtValue() == 16;
+ return false;
+}
+
+static bool isSHL16(const SDValue &Op) {
+ if (Op.getOpcode() != ISD::SHL)
+ return false;
+ if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return Const->getZExtValue() == 16;
+ return false;
+}
+
+// Check for a signed 16-bit value. We special case SRA because it makes it
+// more simple when also looking for SRAs that aren't sign extending a
+// smaller value. Without the check, we'd need to take extra care with
+// checking order for some operations.
+static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
+ if (isSRA16(Op))
+ return isSHL16(Op.getOperand(0));
+ return DAG.ComputeNumSignBits(Op) == 17;
+}
+
/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
switch (CC) {
@@ -1433,22 +1505,34 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
- ARMCC::CondCodes &CondCode2) {
+ ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
CondCode2 = ARMCC::AL;
+ InvalidOnQNaN = true;
switch (CC) {
default: llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
- case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETOEQ:
+ CondCode = ARMCC::EQ;
+ InvalidOnQNaN = false;
+ break;
case ISD::SETGT:
case ISD::SETOGT: CondCode = ARMCC::GT; break;
case ISD::SETGE:
case ISD::SETOGE: CondCode = ARMCC::GE; break;
case ISD::SETOLT: CondCode = ARMCC::MI; break;
case ISD::SETOLE: CondCode = ARMCC::LS; break;
- case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETONE:
+ CondCode = ARMCC::MI;
+ CondCode2 = ARMCC::GT;
+ InvalidOnQNaN = false;
+ break;
case ISD::SETO: CondCode = ARMCC::VC; break;
case ISD::SETUO: CondCode = ARMCC::VS; break;
- case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUEQ:
+ CondCode = ARMCC::EQ;
+ CondCode2 = ARMCC::VS;
+ InvalidOnQNaN = false;
+ break;
case ISD::SETUGT: CondCode = ARMCC::HI; break;
case ISD::SETUGE: CondCode = ARMCC::PL; break;
case ISD::SETLT:
@@ -1456,7 +1540,10 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
case ISD::SETLE:
case ISD::SETULE: CondCode = ARMCC::LE; break;
case ISD::SETNE:
- case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ case ISD::SETUNE:
+ CondCode = ARMCC::NE;
+ InvalidOnQNaN = false;
+ break;
}
}
@@ -1549,8 +1636,8 @@ SDValue ARMTargetLowering::LowerCallResult(
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
- *DAG.getContext(), Call);
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
// Copy all of the result registers out of their specified physreg.
@@ -1710,8 +1797,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext(), Call);
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
// Get a count of how many bytes are to be pushed on the stack.
@@ -2088,10 +2175,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// this.
void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
unsigned Align) const {
- assert((State->getCallOrPrologue() == Prologue ||
- State->getCallOrPrologue() == Call) &&
- "unhandled ParmContext");
-
// Byval (as with any stack) slots are always at least 4 byte aligned.
Align = std::max(Align, 4U);
@@ -2148,7 +2231,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
const TargetInstrInfo *TII) {
unsigned Bytes = Arg.getValueSizeInBits() / 8;
- int FI = INT_MAX;
+ int FI = std::numeric_limits<int>::max();
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!TargetRegisterInfo::isVirtualRegister(VR))
@@ -2178,7 +2261,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
} else
return false;
- assert(FI != INT_MAX);
+ assert(FI != std::numeric_limits<int>::max());
if (!MFI.isFixedObjectIndex(FI))
return false;
return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
@@ -2260,7 +2343,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call);
+ CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
if (CCInfo.getNextStackOffset()) {
// Check if the arguments are already laid out in the right way as
@@ -2362,8 +2445,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slots.
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
- *DAG.getContext(), Call);
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
@@ -2790,9 +2873,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
// FIXME: is there useful debug info available here?
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
+ CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+ CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -2935,7 +3018,7 @@ static bool isSimpleType(Type *T) {
}
static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
- EVT PtrVT, SDLoc dl) {
+ EVT PtrVT, const SDLoc &dl) {
// If we're creating a pool entry for a constant global with unnamed address,
// and the global is small enough, we can emit it inline into the constant pool
// to save ourselves an indirection.
@@ -2980,7 +3063,8 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
unsigned RequiredPadding = 4 - (Size % 4);
bool PaddingPossible =
RequiredPadding == 4 || (CDAInit && CDAInit->isString());
- if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize)
+ if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
+ Size == 0)
return SDValue();
unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
@@ -3080,15 +3164,22 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
return Result;
} else if (Subtarget->isRWPI() && !IsRO) {
// SB-relative.
- ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue G = DAG.getLoad(
- PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ SDValue RelAddr;
+ if (Subtarget->useMovt(DAG.getMachineFunction())) {
+ ++NumMovwMovt;
+ SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
+ RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
+ } else { // use literal pool for address constant
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ RelAddr = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ }
SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
- SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, G);
+ SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
return Result;
}
@@ -3462,8 +3553,8 @@ SDValue ARMTargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext(), Prologue);
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
SmallVector<SDValue, 16> ArgValues;
@@ -3595,7 +3686,6 @@ SDValue ARMTargetLowering::LowerFormalArguments(
InVals.push_back(ArgValue);
} else { // VA.isRegLoc()
-
// sanity check
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
@@ -3734,13 +3824,15 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, const SDLoc &dl) const {
+ SelectionDAG &DAG, const SDLoc &dl,
+ bool InvalidOnQNaN) const {
assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
+ SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
if (!isFloatingPointZero(RHS))
- Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
else
- Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
}
@@ -3757,10 +3849,12 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
Cmp = Cmp.getOperand(0);
Opc = Cmp.getOpcode();
if (Opc == ARMISD::CMPFP)
- Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
+ Cmp.getOperand(1), Cmp.getOperand(2));
else {
assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
- Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
+ Cmp.getOperand(1));
}
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
}
@@ -3808,7 +3902,6 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
return std::make_pair(Value, OverflowCmp);
}
-
SDValue
ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
// Let legalize expand this if it isn't a legal type yet.
@@ -3832,7 +3925,6 @@ ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
-
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
@@ -4025,7 +4117,6 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
// Additionally, the variable is returned in parameter V and the constant in K.
static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
uint64_t &K) {
-
SDValue LHS1 = Op.getOperand(0);
SDValue RHS1 = Op.getOperand(1);
SDValue TrueVal1 = Op.getOperand(2);
@@ -4046,10 +4137,10 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
// in each conditional
SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
? &RHS1
- : NULL;
+ : nullptr;
SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
? &RHS2
- : NULL;
+ : nullptr;
SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
@@ -4073,13 +4164,15 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
const SDValue *LowerCheckOp =
isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
? &Op
- : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2
- : NULL;
+ : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
+ ? &Op2
+ : nullptr;
const SDValue *UpperCheckOp =
isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
? &Op
- : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2
- : NULL;
+ : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
+ ? &Op2
+ : nullptr;
if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
return false;
@@ -4104,7 +4197,6 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
}
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
-
EVT VT = Op.getValueType();
SDLoc dl(Op);
@@ -4162,7 +4254,8 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
ARMCC::CondCodes CondCode, CondCode2;
- FPCCToARMCC(CC, CondCode, CondCode2);
+ bool InvalidOnQNaN;
+ FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
// Try to generate VMAXNM/VMINNM on ARMv8.
if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
@@ -4181,13 +4274,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
}
return Result;
@@ -4348,10 +4441,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
}
ARMCC::CondCodes CondCode, CondCode2;
- FPCCToARMCC(CC, CondCode, CondCode2);
+ bool InvalidOnQNaN;
+ FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
@@ -4853,9 +4947,10 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
- SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
- DAG.getConstant(Intrinsic::arm_get_fpscr, dl,
- MVT::i32));
+ SDValue Ops[] = { DAG.getEntryNode(),
+ DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
+
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
@@ -5584,7 +5679,6 @@ static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
return true;
}
-
static bool isVEXTMask(ArrayRef<int> M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
@@ -6027,10 +6121,10 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
if (ValueCounts.size() != 1)
usesOnlyOneValue = false;
- if (!Value.getNode() && ValueCounts.size() > 0)
+ if (!Value.getNode() && !ValueCounts.empty())
Value = ValueCounts.begin()->first;
- if (ValueCounts.size() == 0)
+ if (ValueCounts.empty())
return DAG.getUNDEF(VT);
// Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
@@ -6182,8 +6276,8 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
struct ShuffleSourceInfo {
SDValue Vec;
- unsigned MinElt;
- unsigned MaxElt;
+ unsigned MinElt = std::numeric_limits<unsigned>::max();
+ unsigned MaxElt = 0;
// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
// be compatible with the shuffle we intend to construct. As a result
@@ -6192,13 +6286,12 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// Code should guarantee that element i in Vec starts at element "WindowBase
// + i * WindowScale in ShuffleVec".
- int WindowBase;
- int WindowScale;
+ int WindowBase = 0;
+ int WindowScale = 1;
+
+ ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
- ShuffleSourceInfo(SDValue Vec)
- : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
- WindowScale(1) {}
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
@@ -6220,7 +6313,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
- auto Source = find(Sources, SourceVec);
+ auto Source = llvm::find(Sources, SourceVec);
if (Source == Sources.end())
Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
@@ -6336,7 +6429,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
if (Entry.isUndef())
continue;
- auto Src = find(Sources, Entry.getOperand(0));
+ auto Src = llvm::find(Sources, Entry.getOperand(0));
int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
@@ -6633,7 +6726,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
EVT SubVT = SubV1.getValueType();
// We expect these to have been canonicalized to -1.
- assert(all_of(ShuffleMask, [&](int i) {
+ assert(llvm::all_of(ShuffleMask, [&](int i) {
return i < (int)VT.getVectorNumElements();
}) && "Unexpected shuffle index into UNDEF operand!");
@@ -6896,8 +6989,19 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
N->getValueType(0),
N->getOpcode());
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
- return SkipLoadExtensionForVMULL(LD, DAG);
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
+ "Expected extending load");
+
+ SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
+ unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue extLoad =
+ DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
+
+ return newLoad;
+ }
// Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
// have been legalized as a BITCAST from v4i32.
@@ -7258,9 +7362,9 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
ArgListEntry Entry;
Entry.Node = SRet;
Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = false;
- Entry.isZExt = false;
- Entry.isSRet = true;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Entry.IsSRet = true;
Args.push_back(Entry);
RetTy = Type::getVoidTy(*DAG.getContext());
}
@@ -7268,8 +7372,8 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
const char *LibcallName =
@@ -7480,12 +7584,12 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
Entry.Node = Val;
Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isZExt = true;
+ Entry.IsZExt = true;
Args.push_back(Entry);
Entry.Node = Exponent;
Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isZExt = true;
+ Entry.IsZExt = true;
Args.push_back(Entry);
Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
@@ -7702,24 +7806,27 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
// add r5, pc
// str r5, [$jbuf, #+4] ; &jbuf[1]
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
- .addConstantPoolIndex(CPI)
- .addMemOperand(CPMMO));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO)
+ .add(predOps(ARMCC::AL));
// Set the low bit because of thumb mode.
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultCC(
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
- .addReg(NewVReg1, RegState::Kill)
- .addImm(0x01)));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(0x01)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
.addReg(NewVReg2, RegState::Kill)
.addImm(PCLabelId);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
- .addReg(NewVReg3, RegState::Kill)
- .addFrameIndex(FI)
- .addImm(36) // &jbuf[1] :: pc
- .addMemOperand(FIMMOSt));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
+ .addReg(NewVReg3, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt)
+ .add(predOps(ARMCC::AL));
} else if (isThumb) {
// Incoming value: jbuf
// ldr.n r1, LCPI1_4
@@ -7729,51 +7836,58 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
// add r2, $jbuf, #+4 ; &jbuf[1]
// str r1, [r2]
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
- .addConstantPoolIndex(CPI)
- .addMemOperand(CPMMO));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
.addReg(NewVReg1, RegState::Kill)
.addImm(PCLabelId);
// Set the low bit because of thumb mode.
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
- .addReg(ARM::CPSR, RegState::Define)
- .addImm(1));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg2, RegState::Kill)
- .addReg(NewVReg3, RegState::Kill));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3, RegState::Kill)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
.addFrameIndex(FI)
.addImm(36); // &jbuf[1] :: pc
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
- .addReg(NewVReg4, RegState::Kill)
- .addReg(NewVReg5, RegState::Kill)
- .addImm(0)
- .addMemOperand(FIMMOSt));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg5, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(FIMMOSt)
+ .add(predOps(ARMCC::AL));
} else {
// Incoming value: jbuf
// ldr r1, LCPI1_1
// add r1, pc, r1
// str r1, [$jbuf, #+4] ; &jbuf[1]
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
- .addConstantPoolIndex(CPI)
- .addImm(0)
- .addMemOperand(CPMMO));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addImm(0)
+ .addMemOperand(CPMMO)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
- .addReg(NewVReg1, RegState::Kill)
- .addImm(PCLabelId));
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
- .addReg(NewVReg2, RegState::Kill)
- .addFrameIndex(FI)
- .addImm(36) // &jbuf[1] :: pc
- .addMemOperand(FIMMOSt));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId)
+ .add(predOps(ARMCC::AL));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
+ .addReg(NewVReg2, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt)
+ .add(predOps(ARMCC::AL));
}
}
@@ -7791,7 +7905,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
- DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
+ DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
unsigned MaxCSNum = 0;
for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
++BB) {
@@ -7886,31 +8000,36 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
unsigned NumLPads = LPadList.size();
if (Subtarget->isThumb2()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
- .addFrameIndex(FI)
- .addImm(4)
- .addMemOperand(FIMMOLd));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd)
+ .add(predOps(ARMCC::AL));
if (NumLPads < 256) {
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
- .addReg(NewVReg1)
- .addImm(LPadList.size()));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size())
+ .add(predOps(ARMCC::AL));
} else {
unsigned VReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
- .addImm(NumLPads & 0xFFFF));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF)
+ .add(predOps(ARMCC::AL));
unsigned VReg2 = VReg1;
if ((NumLPads & 0xFFFF0000) != 0) {
VReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
- .addReg(VReg1)
- .addImm(NumLPads >> 16));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16)
+ .add(predOps(ARMCC::AL));
}
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
- .addReg(NewVReg1)
- .addReg(VReg2));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2)
+ .add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
@@ -7919,16 +8038,17 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(ARM::CPSR);
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
- .addJumpTableIndex(MJTI));
+ BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
- AddDefaultCC(
- AddDefaultPred(
- BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
+ BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
.addReg(NewVReg3, RegState::Kill)
.addReg(NewVReg1)
- .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
.addReg(NewVReg4, RegState::Kill)
@@ -7936,15 +8056,17 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addJumpTableIndex(MJTI);
} else if (Subtarget->isThumb()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
- .addFrameIndex(FI)
- .addImm(1)
- .addMemOperand(FIMMOLd));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(1)
+ .addMemOperand(FIMMOLd)
+ .add(predOps(ARMCC::AL));
if (NumLPads < 256) {
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
- .addReg(NewVReg1)
- .addImm(NumLPads));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
+ .addReg(NewVReg1)
+ .addImm(NumLPads)
+ .add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -7957,12 +8079,14 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
- .addReg(VReg1, RegState::Define)
- .addConstantPoolIndex(Idx));
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
- .addReg(NewVReg1)
- .addReg(VReg1));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .add(predOps(ARMCC::AL));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
+ .addReg(NewVReg1)
+ .addReg(VReg1)
+ .add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
@@ -7971,37 +8095,42 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(ARM::CPSR);
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg1)
- .addImm(2));
+ BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg1)
+ .addImm(2)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
- .addJumpTableIndex(MJTI));
+ BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg2, RegState::Kill)
- .addReg(NewVReg3));
+ BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3)
+ .add(predOps(ARMCC::AL));
MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
- .addReg(NewVReg4, RegState::Kill)
- .addImm(0)
- .addMemOperand(JTMMOLd));
+ BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
+ .addReg(NewVReg4, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(JTMMOLd)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg6 = NewVReg5;
if (IsPositionIndependent) {
NewVReg6 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg5, RegState::Kill)
- .addReg(NewVReg3));
+ BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg3)
+ .add(predOps(ARMCC::AL));
}
BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
@@ -8009,31 +8138,36 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addJumpTableIndex(MJTI);
} else {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
- .addFrameIndex(FI)
- .addImm(4)
- .addMemOperand(FIMMOLd));
+ BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd)
+ .add(predOps(ARMCC::AL));
if (NumLPads < 256) {
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
- .addReg(NewVReg1)
- .addImm(NumLPads));
+ BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
+ .addReg(NewVReg1)
+ .addImm(NumLPads)
+ .add(predOps(ARMCC::AL));
} else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
unsigned VReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
- .addImm(NumLPads & 0xFFFF));
+ BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF)
+ .add(predOps(ARMCC::AL));
unsigned VReg2 = VReg1;
if ((NumLPads & 0xFFFF0000) != 0) {
VReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
- .addReg(VReg1)
- .addImm(NumLPads >> 16));
+ BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16)
+ .add(predOps(ARMCC::AL));
}
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
- .addReg(NewVReg1)
- .addReg(VReg2));
+ BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2)
+ .add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -8046,13 +8180,15 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
- .addReg(VReg1, RegState::Define)
- .addConstantPoolIndex(Idx)
- .addImm(0));
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
- .addReg(NewVReg1)
- .addReg(VReg1, RegState::Kill));
+ BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg1, RegState::Kill)
+ .add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
@@ -8061,23 +8197,25 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(ARM::CPSR);
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultCC(
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
- .addReg(NewVReg1)
- .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+ BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
- .addJumpTableIndex(MJTI));
+ BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
+ .addJumpTableIndex(MJTI)
+ .add(predOps(ARMCC::AL));
MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(
- BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
- .addReg(NewVReg3, RegState::Kill)
- .addReg(NewVReg4)
- .addImm(0)
- .addMemOperand(JTMMOLd));
+ BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
+ .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg4)
+ .addImm(0)
+ .addMemOperand(JTMMOLd)
+ .add(predOps(ARMCC::AL));
if (IsPositionIndependent) {
BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
@@ -8222,26 +8360,35 @@ static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
assert(LdOpc != 0 && "Should have a load opcode");
if (LdSize >= 8) {
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
- .addReg(AddrOut, RegState::Define).addReg(AddrIn)
- .addImm(0));
+ BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define)
+ .addReg(AddrIn)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
} else if (IsThumb1) {
// load + update AddrIn
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
- .addReg(AddrIn).addImm(0));
- MachineInstrBuilder MIB =
- BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(AddrIn).addImm(LdSize);
- AddDefaultPred(MIB);
+ BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrIn)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
+ .add(t1CondCodeOp())
+ .addReg(AddrIn)
+ .addImm(LdSize)
+ .add(predOps(ARMCC::AL));
} else if (IsThumb2) {
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
- .addReg(AddrOut, RegState::Define).addReg(AddrIn)
- .addImm(LdSize));
+ BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define)
+ .addReg(AddrIn)
+ .addImm(LdSize)
+ .add(predOps(ARMCC::AL));
} else { // arm
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
- .addReg(AddrOut, RegState::Define).addReg(AddrIn)
- .addReg(0).addImm(LdSize));
+ BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define)
+ .addReg(AddrIn)
+ .addReg(0)
+ .addImm(LdSize)
+ .add(predOps(ARMCC::AL));
}
}
@@ -8254,24 +8401,36 @@ static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
assert(StOpc != 0 && "Should have a store opcode");
if (StSize >= 8) {
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
- .addReg(AddrIn).addImm(0).addReg(Data));
+ BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(AddrIn)
+ .addImm(0)
+ .addReg(Data)
+ .add(predOps(ARMCC::AL));
} else if (IsThumb1) {
// store + update AddrIn
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
- .addReg(AddrIn).addImm(0));
- MachineInstrBuilder MIB =
- BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(AddrIn).addImm(StSize);
- AddDefaultPred(MIB);
+ BuildMI(*BB, Pos, dl, TII->get(StOpc))
+ .addReg(Data)
+ .addReg(AddrIn)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
+ .add(t1CondCodeOp())
+ .addReg(AddrIn)
+ .addImm(StSize)
+ .add(predOps(ARMCC::AL));
} else if (IsThumb2) {
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
- .addReg(Data).addReg(AddrIn).addImm(StSize));
+ BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data)
+ .addReg(AddrIn)
+ .addImm(StSize)
+ .add(predOps(ARMCC::AL));
} else { // arm
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
- .addReg(Data).addReg(AddrIn).addReg(0)
- .addImm(StSize));
+ BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data)
+ .addReg(AddrIn)
+ .addReg(0)
+ .addImm(StSize)
+ .add(predOps(ARMCC::AL));
}
}
@@ -8402,16 +8561,15 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
Vtmp = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16),
- Vtmp).addImm(LoopSize & 0xFFFF));
+ BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
+ .addImm(LoopSize & 0xFFFF)
+ .add(predOps(ARMCC::AL));
if ((LoopSize & 0xFFFF0000) != 0)
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16),
- varEnd)
- .addReg(Vtmp)
- .addImm(LoopSize >> 16));
+ BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
+ .addReg(Vtmp)
+ .addImm(LoopSize >> 16)
+ .add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -8424,11 +8582,16 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
if (IsThumb)
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
- varEnd, RegState::Define).addConstantPoolIndex(Idx));
+ BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
+ .addReg(varEnd, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .add(predOps(ARMCC::AL));
else
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
- varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
+ BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
+ .addReg(varEnd, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
}
BB->addSuccessor(loopMBB);
@@ -8465,16 +8628,19 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
// Decrement loop variable by UnitSize.
if (IsThumb1) {
- MachineInstrBuilder MIB =
- BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(varPhi).addImm(UnitSize);
- AddDefaultPred(MIB);
+ BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
+ .add(t1CondCodeOp())
+ .addReg(varPhi)
+ .addImm(UnitSize)
+ .add(predOps(ARMCC::AL));
} else {
MachineInstrBuilder MIB =
BuildMI(*BB, BB->end(), dl,
TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
- AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB.addReg(varPhi)
+ .addImm(UnitSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
MIB->getOperand(5).setReg(ARM::CPSR);
MIB->getOperand(5).setIsDef(true);
}
@@ -8545,11 +8711,12 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
case CodeModel::Default:
case CodeModel::Kernel:
BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addExternalSymbol("__chkstk")
- .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
- .addReg(ARM::R4, RegState::Implicit | RegState::Define)
- .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ .add(predOps(ARMCC::AL))
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12,
+ RegState::Implicit | RegState::Define | RegState::Dead);
break;
case CodeModel::Large:
case CodeModel::JITDefault: {
@@ -8559,20 +8726,22 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
.addExternalSymbol("__chkstk");
BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addReg(Reg, RegState::Kill)
- .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
- .addReg(ARM::R4, RegState::Implicit | RegState::Define)
- .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ .add(predOps(ARMCC::AL))
+ .addReg(Reg, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12,
+ RegState::Implicit | RegState::Define | RegState::Dead);
break;
}
}
- AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
- ARM::SP)
- .addReg(ARM::SP, RegState::Kill)
- .addReg(ARM::R4, RegState::Kill)
- .setMIFlags(MachineInstr::FrameSetup)));
+ BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
MI.eraseFromParent();
return MBB;
@@ -8597,9 +8766,10 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
MF->push_back(TrapBB);
MBB->addSuccessor(TrapBB);
- AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
- .addReg(MI.getOperand(0).getReg())
- .addImm(0));
+ BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
+ .addReg(MI.getOperand(0).getReg())
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
.addMBB(TrapBB)
.addImm(ARMCC::EQ)
@@ -8617,18 +8787,18 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
bool isThumb2 = Subtarget->isThumb2();
switch (MI.getOpcode()) {
default: {
- MI.dump();
+ MI.print(errs());
llvm_unreachable("Unexpected instr type to insert");
}
// Thumb1 post-indexed loads are really just single-register LDMs.
case ARM::tLDR_postidx: {
BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
- .addOperand(MI.getOperand(1)) // Rn_wb
- .addOperand(MI.getOperand(2)) // Rn
- .addOperand(MI.getOperand(3)) // PredImm
- .addOperand(MI.getOperand(4)) // PredReg
- .addOperand(MI.getOperand(0)); // Rt
+ .add(MI.getOperand(1)) // Rn_wb
+ .add(MI.getOperand(2)) // Rn
+ .add(MI.getOperand(3)) // PredImm
+ .add(MI.getOperand(4)) // PredReg
+ .add(MI.getOperand(0)); // Rt
MI.eraseFromParent();
return BB;
}
@@ -8659,12 +8829,12 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineMemOperand *MMO = *MI.memoperands_begin();
BuildMI(*BB, MI, dl, TII->get(NewOpc))
- .addOperand(MI.getOperand(0)) // Rn_wb
- .addOperand(MI.getOperand(1)) // Rt
- .addOperand(MI.getOperand(2)) // Rn
- .addImm(Offset) // offset (skip GPR==zero_reg)
- .addOperand(MI.getOperand(5)) // pred
- .addOperand(MI.getOperand(6))
+ .add(MI.getOperand(0)) // Rn_wb
+ .add(MI.getOperand(1)) // Rt
+ .add(MI.getOperand(2)) // Rn
+ .addImm(Offset) // offset (skip GPR==zero_reg)
+ .add(MI.getOperand(5)) // pred
+ .add(MI.getOperand(6))
.addMemOperand(MMO);
MI.eraseFromParent();
return BB;
@@ -8681,7 +8851,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
for (unsigned i = 0; i < MI.getNumOperands(); ++i)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
MI.eraseFromParent();
return BB;
}
@@ -8754,18 +8924,20 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
unsigned LHS1 = MI.getOperand(1).getReg();
unsigned LHS2 = MI.getOperand(2).getReg();
if (RHSisZero) {
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(LHS1).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS1)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(LHS2).addImm(0)
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
} else {
unsigned RHS1 = MI.getOperand(3).getReg();
unsigned RHS2 = MI.getOperand(4).getReg();
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
- .addReg(LHS1).addReg(RHS1));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS1)
+ .addReg(RHS1)
+ .add(predOps(ARMCC::AL));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(LHS2).addReg(RHS2)
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
@@ -8779,7 +8951,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
.addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
if (isThumb2)
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
+ BuildMI(BB, dl, TII->get(ARM::t2B))
+ .addMBB(exitMBB)
+ .add(predOps(ARMCC::AL));
else
BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
@@ -8842,9 +9016,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
RSBBB->addSuccessor(SinkBB);
// insert a cmp at the end of BB
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(ABSSrcReg).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(ABSSrcReg)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
BuildMI(BB, dl,
@@ -8855,9 +9030,11 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Note: BCC and rsbri will be converted into predicated rsbmi
// by if-conversion pass
BuildMI(*RSBBB, RSBBB->begin(), dl,
- TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
- .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
- .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
+ .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
// insert PHI in SinkBB,
// reuse ABSDstReg to not change uses of ABS instruction
@@ -8927,19 +9104,45 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
+ unsigned ccOutIdx;
if (NewOpc) {
const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
MCID = &TII->get(NewOpc);
- assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 &&
- "converted opcode should be the same except for cc_out");
+ assert(MCID->getNumOperands() ==
+ MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
+ && "converted opcode should be the same except for cc_out"
+ " (and, on Thumb1, pred)");
MI.setDesc(*MCID);
// Add the optional cc_out operand
MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
- }
- unsigned ccOutIdx = MCID->getNumOperands() - 1;
+
+ // On Thumb1, move all input operands to the end, then add the predicate
+ if (Subtarget->isThumb1Only()) {
+ for (unsigned c = MCID->getNumOperands() - 4; c--;) {
+ MI.addOperand(MI.getOperand(1));
+ MI.RemoveOperand(1);
+ }
+
+ // Restore the ties
+ for (unsigned i = MI.getNumOperands(); i--;) {
+ const MachineOperand& op = MI.getOperand(i);
+ if (op.isReg() && op.isUse()) {
+ int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
+ if (DefIdx != -1)
+ MI.tieOperands(DefIdx, i);
+ }
+ }
+
+ MI.addOperand(MachineOperand::CreateImm(ARMCC::AL));
+ MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
+ ccOutIdx = 1;
+ } else
+ ccOutIdx = MCID->getNumOperands() - 1;
+ } else
+ ccOutIdx = MCID->getNumOperands() - 1;
// Any ARM instruction that sets the 's' bit should specify an optional
// "cc_out" operand in the last operand position.
@@ -8970,7 +9173,9 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
if (deadCPSR) {
assert(!MI.getOperand(ccOutIdx).getReg() &&
"expect uninitialized optional cc_out operand");
- return;
+ // Thumb1 instructions must have the S bit even if the CPSR is dead.
+ if (!Subtarget->isThumb1Only())
+ return;
}
// If this instruction was defined with an optional CPSR def and its dag node
@@ -9032,7 +9237,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
SDLoc dl(N);
EVT VT = N->getValueType(0);
CC = N->getOperand(0);
- if (CC.getValueType() != MVT::i1)
+ if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
return false;
Invert = !AllOnes;
if (AllOnes)
@@ -9308,10 +9513,90 @@ static SDValue findMUL_LOHI(SDValue V) {
return SDValue();
}
-static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
+static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ if (Subtarget->isThumb()) {
+ if (!Subtarget->hasDSP())
+ return SDValue();
+ } else if (!Subtarget->hasV5TEOps())
+ return SDValue();
+
+ // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
+ // accumulates the product into a 64-bit value. The 16-bit values will
+ // be sign extended somehow or SRA'd into 32-bit values
+ // (addc (adde (mul 16bit, 16bit), lo), hi)
+ SDValue Mul = AddcNode->getOperand(0);
+ SDValue Lo = AddcNode->getOperand(1);
+ if (Mul.getOpcode() != ISD::MUL) {
+ Lo = AddcNode->getOperand(0);
+ Mul = AddcNode->getOperand(1);
+ if (Mul.getOpcode() != ISD::MUL)
+ return SDValue();
+ }
+
+ SDValue SRA = AddeNode->getOperand(0);
+ SDValue Hi = AddeNode->getOperand(1);
+ if (SRA.getOpcode() != ISD::SRA) {
+ SRA = AddeNode->getOperand(1);
+ Hi = AddeNode->getOperand(0);
+ if (SRA.getOpcode() != ISD::SRA)
+ return SDValue();
+ }
+ if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
+ if (Const->getZExtValue() != 31)
+ return SDValue();
+ } else
+ return SDValue();
+
+ if (SRA.getOperand(0) != Mul)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(AddcNode);
+ unsigned Opcode = 0;
+ SDValue Op0;
+ SDValue Op1;
+
+ if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
+ Opcode = ARMISD::SMLALBB;
+ Op0 = Mul.getOperand(0);
+ Op1 = Mul.getOperand(1);
+ } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
+ Opcode = ARMISD::SMLALBT;
+ Op0 = Mul.getOperand(0);
+ Op1 = Mul.getOperand(1).getOperand(0);
+ } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
+ Opcode = ARMISD::SMLALTB;
+ Op0 = Mul.getOperand(0).getOperand(0);
+ Op1 = Mul.getOperand(1);
+ } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
+ Opcode = ARMISD::SMLALTT;
+ Op0 = Mul->getOperand(0).getOperand(0);
+ Op1 = Mul->getOperand(1).getOperand(0);
+ }
+
+ if (!Op0 || !Op1)
+ return SDValue();
+
+ SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ Op0, Op1, Lo, Hi);
+ // Replace the ADDs' nodes uses by the MLA node's values.
+ SDValue HiMLALResult(SMLAL.getNode(), 1);
+ SDValue LoMLALResult(SMLAL.getNode(), 0);
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
+
+ // Return original node to notify the driver to stop replacing.
+ SDValue resNode(AddcNode, 0);
+ return resNode;
+}
+
+static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
-
// Look for multiply add opportunities.
// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
// each add nodes consumes a value from ISD::UMUL_LOHI and there is
@@ -9326,7 +9611,17 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
// \ /
// ADDC <- hiAdd
//
- assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
+ assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE");
+
+ assert(AddeNode->getNumOperands() == 3 &&
+ AddeNode->getOperand(2).getValueType() == MVT::i32 &&
+ "ADDE node has the wrong inputs");
+
+ // Check that we have a glued ADDC node.
+ SDNode* AddcNode = AddeNode->getOperand(2).getNode();
+ if (AddcNode->getOpcode() != ARMISD::ADDC)
+ return SDValue();
+
SDValue AddcOp0 = AddcNode->getOperand(0);
SDValue AddcOp1 = AddcNode->getOperand(1);
@@ -9338,29 +9633,13 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
AddcNode->getValueType(0) == MVT::i32 &&
"Expect ADDC with two result values. First: i32");
- // Check that we have a glued ADDC node.
- if (AddcNode->getValueType(1) != MVT::Glue)
- return SDValue();
-
- // Check that the ADDC adds the low result of the S/UMUL_LOHI.
+ // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
+ // maybe a SMLAL which multiplies two 16-bit values.
if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
AddcOp1->getOpcode() != ISD::SMUL_LOHI)
- return SDValue();
-
- // Look for the glued ADDE.
- SDNode* AddeNode = AddcNode->getGluedUser();
- if (!AddeNode)
- return SDValue();
-
- // Make sure it is really an ADDE.
- if (AddeNode->getOpcode() != ISD::ADDE)
- return SDValue();
-
- assert(AddeNode->getNumOperands() == 3 &&
- AddeNode->getOperand(2).getValueType() == MVT::Glue &&
- "ADDE node has the wrong inputs");
+ return AddCombineTo64BitSMLAL16(AddcNode, AddeNode, DCI, Subtarget);
// Check for the triangle shape.
SDValue AddeOp0 = AddeNode->getOperand(0);
@@ -9435,38 +9714,25 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
// Return original node to notify the driver to stop replacing.
- SDValue resNode(AddcNode, 0);
- return resNode;
+ return SDValue(AddeNode, 0);
}
-static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
+static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// UMAAL is similar to UMLAL except that it adds two unsigned values.
// While trying to combine for the other MLAL nodes, first search for the
- // chance to use UMAAL. Check if Addc uses another addc node which can first
- // be combined into a UMLAL. The other pattern is AddcNode being combined
- // into an UMLAL and then using another addc is handled in ISelDAGToDAG.
-
- if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() ||
- (Subtarget->isThumb() && !Subtarget->hasThumb2()))
- return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
-
- SDNode *PrevAddc = nullptr;
- if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC)
- PrevAddc = AddcNode->getOperand(0).getNode();
- else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC)
- PrevAddc = AddcNode->getOperand(1).getNode();
-
- // If there's no addc chains, just return a search for any MLAL.
- if (PrevAddc == nullptr)
- return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
-
- // Try to convert the addc operand to an MLAL and if that fails try to
- // combine AddcNode.
- SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget);
- if (MLAL != SDValue(PrevAddc, 0))
- return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+ // chance to use UMAAL. Check if Addc uses a node which has already
+ // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
+ // as the addend, and it's handled in PerformUMLALCombine.
+
+ if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
+ return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
+
+ // Check that we have a glued ADDC node.
+ SDNode* AddcNode = AddeNode->getOperand(2).getNode();
+ if (AddcNode->getOpcode() != ARMISD::ADDC)
+ return SDValue();
// Find the converted UMAAL or quit if it doesn't exist.
SDNode *UmlalNode = nullptr;
@@ -9478,29 +9744,18 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
UmlalNode = AddcNode->getOperand(1).getNode();
AddHi = AddcNode->getOperand(0);
} else {
- return SDValue();
+ return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
}
// The ADDC should be glued to an ADDE node, which uses the same UMLAL as
// the ADDC as well as Zero.
- auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3));
-
- if (!Zero || Zero->getZExtValue() != 0)
+ if (!isNullConstant(UmlalNode->getOperand(3)))
return SDValue();
- // Check that we have a glued ADDC node.
- if (AddcNode->getValueType(1) != MVT::Glue)
- return SDValue();
-
- // Look for the glued ADDE.
- SDNode* AddeNode = AddcNode->getGluedUser();
- if (!AddeNode)
- return SDValue();
-
- if ((AddeNode->getOperand(0).getNode() == Zero &&
+ if ((isNullConstant(AddeNode->getOperand(0)) &&
AddeNode->getOperand(1).getNode() == UmlalNode) ||
(AddeNode->getOperand(0).getNode() == UmlalNode &&
- AddeNode->getOperand(1).getNode() == Zero)) {
+ isNullConstant(AddeNode->getOperand(1)))) {
SelectionDAG &DAG = DCI.DAG;
SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
@@ -9513,19 +9768,84 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
// Return original node to notify the driver to stop replacing.
- return SDValue(AddcNode, 0);
+ return SDValue(AddeNode, 0);
}
return SDValue();
}
-/// PerformADDCCombine - Target-specific dag combine transform from
-/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or
-/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
-static SDValue PerformADDCCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const ARMSubtarget *Subtarget) {
+static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
+ return SDValue();
+
+ // Check that we have a pair of ADDC and ADDE as operands.
+ // Both addends of the ADDE must be zero.
+ SDNode* AddcNode = N->getOperand(2).getNode();
+ SDNode* AddeNode = N->getOperand(3).getNode();
+ if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
+ (AddeNode->getOpcode() == ARMISD::ADDE) &&
+ isNullConstant(AddeNode->getOperand(0)) &&
+ isNullConstant(AddeNode->getOperand(1)) &&
+ (AddeNode->getOperand(2).getNode() == AddcNode))
+ return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
+ DAG.getVTList(MVT::i32, MVT::i32),
+ {N->getOperand(0), N->getOperand(1),
+ AddcNode->getOperand(0), AddcNode->getOperand(1)});
+ else
+ return SDValue();
+}
+
+static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (Subtarget->isThumb1Only()) {
+ SDValue RHS = N->getOperand(1);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
+ int32_t imm = C->getSExtValue();
+ if (imm < 0 && imm > INT_MIN) {
+ SDLoc DL(N);
+ RHS = DAG.getConstant(-imm, DL, MVT::i32);
+ unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
+ : ARMISD::ADDC;
+ return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
+ }
+ }
+ }
+ return SDValue();
+}
- if (Subtarget->isThumb1Only()) return SDValue();
+static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (Subtarget->isThumb1Only()) {
+ SDValue RHS = N->getOperand(1);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
+ int64_t imm = C->getSExtValue();
+ if (imm < 0) {
+ SDLoc DL(N);
+
+ // The with-carry-in form matches bitwise not instead of the negation.
+ // Effectively, the inverse interpretation of the carry flag already
+ // accounts for part of the negation.
+ RHS = DAG.getConstant(~imm, DL, MVT::i32);
+
+ unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
+ : ARMISD::ADDE;
+ return DAG.getNode(Opcode, DL, N->getVTList(),
+ N->getOperand(0), RHS, N->getOperand(2));
+ }
+ }
+ }
+ return SDValue();
+}
+
+/// PerformADDECombine - Target-specific dag combine transform from
+/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
+/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
+static SDValue PerformADDECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Only ARM and Thumb2 support UMLAL/SMLAL.
+ if (Subtarget->isThumb1Only())
+ return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
// Only perform the checks after legalize when the pattern is available.
if (DCI.isBeforeLegalize()) return SDValue();
@@ -9722,7 +10042,6 @@ static SDValue PerformMULCombine(SDNode *N,
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
-
// Attempt to use immediate-form VBIC
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
SDLoc dl(N);
@@ -9761,6 +10080,67 @@ static SDValue PerformANDCombine(SDNode *N,
return SDValue();
}
+// Try combining OR nodes to SMULWB, SMULWT.
+static SDValue PerformORCombineToSMULWBT(SDNode *OR,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasV6Ops() ||
+ (Subtarget->isThumb() &&
+ (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
+ return SDValue();
+
+ SDValue SRL = OR->getOperand(0);
+ SDValue SHL = OR->getOperand(1);
+
+ if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
+ SRL = OR->getOperand(1);
+ SHL = OR->getOperand(0);
+ }
+ if (!isSRL16(SRL) || !isSHL16(SHL))
+ return SDValue();
+
+ // The first operands to the shifts need to be the two results from the
+ // same smul_lohi node.
+ if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
+ SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
+ return SDValue();
+
+ SDNode *SMULLOHI = SRL.getOperand(0).getNode();
+ if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
+ SHL.getOperand(0) != SDValue(SMULLOHI, 1))
+ return SDValue();
+
+ // Now we have:
+ // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
+ // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
+ // For SMUWB the 16-bit value will signed extended somehow.
+ // For SMULWT only the SRA is required.
+ // Check both sides of SMUL_LOHI
+ SDValue OpS16 = SMULLOHI->getOperand(0);
+ SDValue OpS32 = SMULLOHI->getOperand(1);
+
+ SelectionDAG &DAG = DCI.DAG;
+ if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
+ OpS16 = OpS32;
+ OpS32 = SMULLOHI->getOperand(0);
+ }
+
+ SDLoc dl(OR);
+ unsigned Opcode = 0;
+ if (isS16(OpS16, DAG))
+ Opcode = ARMISD::SMULWB;
+ else if (isSRA16(OpS16)) {
+ Opcode = ARMISD::SMULWT;
+ OpS16 = OpS16->getOperand(0);
+ }
+ else
+ return SDValue();
+
+ SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
+ return SDValue(OR, 0);
+}
+
/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
static SDValue PerformORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
@@ -9798,6 +10178,8 @@ static SDValue PerformORCombine(SDNode *N,
// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
+ if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
+ return Result;
}
// The code below optimizes (or (and X, Y), Z).
@@ -9906,7 +10288,7 @@ static SDValue PerformORCombine(SDNode *N,
(Mask == ~Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
- if (Subtarget->hasT2ExtractPack() &&
+ if (Subtarget->hasDSP() &&
(Mask == 0xffff || Mask == 0xffff0000))
return SDValue();
// 2a
@@ -9922,7 +10304,7 @@ static SDValue PerformORCombine(SDNode *N,
(~Mask == Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
- if (Subtarget->hasT2ExtractPack() &&
+ if (Subtarget->hasDSP() &&
(Mask2 == 0xffff || Mask2 == 0xffff0000))
return SDValue();
// 2b
@@ -11324,8 +11706,8 @@ static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
if (Op.getOpcode() == ARMISD::CMOV) {
APInt KZ2(KnownZero.getBitWidth(), 0);
APInt KO2(KnownOne.getBitWidth(), 0);
- computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne);
- computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2);
+ computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
+ computeKnownBits(DAG, Op.getOperand(1), KZ2, KO2);
KnownZero &= KZ2;
KnownOne &= KO2;
@@ -11555,13 +11937,17 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
- case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget);
+ case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
+ case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
+ case ARMISD::ADDC:
+ case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
@@ -11593,6 +11979,56 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformVLDCombine(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
+ case ARMISD::SMULWB: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
+ if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMULWT: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
+ if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMLALBB: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
+ if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMLALBT: {
+ unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
+ APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
+ unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
+ APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
+ if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMLALTB: {
+ unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
+ APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
+ unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
+ APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
+ if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMLALTT: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
+ if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
+ return SDValue();
+ break;
+ }
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -12180,6 +12616,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned BitWidth = KnownOne.getBitWidth();
@@ -12588,8 +13025,8 @@ static TargetLowering::ArgListTy getDivRemArgList(
Type *ArgTy = ArgVT.getTypeForEVT(*Context);
Entry.Node = N->getOperand(i);
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
if (Subtarget->isTargetWindows() && Args.size() >= 2)
@@ -12861,7 +13298,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
case Intrinsic::arm_stlexd:
- case Intrinsic::arm_strexd: {
+ case Intrinsic::arm_strexd:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(2);
@@ -12871,9 +13308,9 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.readMem = false;
Info.writeMem = true;
return true;
- }
+
case Intrinsic::arm_ldaexd:
- case Intrinsic::arm_ldrexd: {
+ case Intrinsic::arm_ldrexd:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(0);
@@ -12883,7 +13320,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.readMem = true;
Info.writeMem = false;
return true;
- }
+
default:
break;
}
@@ -12921,7 +13358,7 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
- Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
+ Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
Builder.getInt32(0), Builder.getInt32(7),
Builder.getInt32(10), Builder.getInt32(5)};
@@ -12932,7 +13369,7 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
llvm_unreachable("makeDMB on a target so old that it has no barriers");
}
} else {
- Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
+ Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
// Only a full system barrier exists in the M-class architectures.
Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
Constant *CDomain = Builder.getInt32(Domain);
@@ -13089,7 +13526,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
if (ValTy->getPrimitiveSizeInBits() == 64) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
- Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
+ Function *Ldrex = Intrinsic::getDeclaration(M, Int);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
@@ -13106,7 +13543,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
- Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+ Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
return Builder.CreateTruncOrBitCast(
Builder.CreateCall(Ldrex, Addr),
@@ -13118,7 +13555,7 @@ void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
if (!Subtarget->hasV7Ops())
return;
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
+ Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
}
Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
@@ -13154,6 +13591,39 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Addr});
}
+/// A helper function for determining the number of interleaved accesses we
+/// will generate when lowering accesses of the given type.
+unsigned
+ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const {
+ return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
+}
+
+bool ARMTargetLowering::isLegalInterleavedAccessType(
+ VectorType *VecTy, const DataLayout &DL) const {
+
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+
+ // Ensure the vector doesn't have f16 elements. Even though we could do an
+ // i16 vldN, we can't hold the f16 vectors and will end up converting via
+ // f32.
+ if (VecTy->getElementType()->isHalfTy())
+ return false;
+
+ // Ensure the number of vector elements is greater than 1.
+ if (VecTy->getNumElements() < 2)
+ return false;
+
+ // Ensure the element type is legal.
+ if (ElSize != 8 && ElSize != 16 && ElSize != 32)
+ return false;
+
+ // Ensure the total vector size is 64 or a multiple of 128. Types larger than
+ // 128 will be split into multiple interleaved accesses.
+ return VecSize == 64 || VecSize % 128 == 0;
+}
+
/// \brief Lower an interleaved load into a vldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -13178,64 +13648,97 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Type *EltTy = VecTy->getVectorElementType();
const DataLayout &DL = LI->getModule()->getDataLayout();
- unsigned VecSize = DL.getTypeSizeInBits(VecTy);
- bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip if we do not have NEON and skip illegal vector types and vector types
- // with i64/f64 elements (vldN doesn't support i64/f64 elements).
- if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits)
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
return false;
+ unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
+
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
if (EltTy->isPointerTy())
VecTy =
VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
+ IRBuilder<> Builder(LI);
+
+ // The base address of the load.
+ Value *BaseAddr = LI->getPointerOperand();
+
+ if (NumLoads > 1) {
+ // If we're going to generate more than one load, reset the sub-vector type
+ // to something legal.
+ VecTy = VectorType::get(VecTy->getVectorElementType(),
+ VecTy->getVectorNumElements() / NumLoads);
+
+ // We will compute the pointer operand of each load from the original base
+ // address using GEPs. Cast the base address to a pointer to the scalar
+ // element type.
+ BaseAddr = Builder.CreateBitCast(
+ BaseAddr, VecTy->getVectorElementType()->getPointerTo(
+ LI->getPointerAddressSpace()));
+ }
+
+ assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
+
+ Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
+ Type *Tys[] = {VecTy, Int8Ptr};
static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
Intrinsic::arm_neon_vld3,
Intrinsic::arm_neon_vld4};
+ Function *VldnFunc =
+ Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
- IRBuilder<> Builder(LI);
- SmallVector<Value *, 2> Ops;
+ // Holds sub-vectors extracted from the load intrinsic return values. The
+ // sub-vectors are associated with the shufflevector instructions they will
+ // replace.
+ DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
- Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
- Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
- Ops.push_back(Builder.getInt32(LI->getAlignment()));
+ for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
- Type *Tys[] = { VecTy, Int8Ptr };
- Function *VldnFunc =
- Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
- CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
+ // If we're generating more than one load, compute the base address of
+ // subsequent loads as an offset from the previous.
+ if (LoadCount > 0)
+ BaseAddr = Builder.CreateConstGEP1_32(
+ BaseAddr, VecTy->getVectorNumElements() * Factor);
- // Replace uses of each shufflevector with the corresponding vector loaded
- // by ldN.
- for (unsigned i = 0; i < Shuffles.size(); i++) {
- ShuffleVectorInst *SV = Shuffles[i];
- unsigned Index = Indices[i];
+ SmallVector<Value *, 2> Ops;
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
+ Ops.push_back(Builder.getInt32(LI->getAlignment()));
- Value *SubVec = Builder.CreateExtractValue(VldN, Index);
+ CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
- // Convert the integer vector to pointer vector if the element is pointer.
- if (EltTy->isPointerTy())
- SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
+ // Replace uses of each shufflevector with the corresponding vector loaded
+ // by ldN.
+ for (unsigned i = 0; i < Shuffles.size(); i++) {
+ ShuffleVectorInst *SV = Shuffles[i];
+ unsigned Index = Indices[i];
- SV->replaceAllUsesWith(SubVec);
- }
+ Value *SubVec = Builder.CreateExtractValue(VldN, Index);
- return true;
-}
+ // Convert the integer vector to pointer vector if the element is pointer.
+ if (EltTy->isPointerTy())
+ SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
-/// \brief Get a mask consisting of sequential integers starting from \p Start.
-///
-/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
-static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
- unsigned NumElts) {
- SmallVector<Constant *, 16> Mask;
- for (unsigned i = 0; i < NumElts; i++)
- Mask.push_back(Builder.getInt32(Start + i));
+ SubVecs[SV].push_back(SubVec);
+ }
+ }
+
+ // Replace uses of the shufflevector instructions with the sub-vectors
+ // returned by the load intrinsic. If a shufflevector instruction is
+ // associated with more than one sub-vector, those sub-vectors will be
+ // concatenated into a single wide vector.
+ for (ShuffleVectorInst *SVI : Shuffles) {
+ auto &SubVec = SubVecs[SVI];
+ auto *WideVec =
+ SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
+ SVI->replaceAllUsesWith(WideVec);
+ }
- return ConstantVector::get(Mask);
+ return true;
}
/// \brief Lower an interleaved store into a vstN intrinsic.
@@ -13279,15 +13782,15 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
- bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip if we do not have NEON and skip illegal vector types and vector types
- // with i64/f64 elements (vstN doesn't support i64/f64 elements).
- if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) ||
- EltIs64Bits)
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
return false;
+ unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
+
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
IRBuilder<> Builder(SI);
@@ -13306,44 +13809,75 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SubVecTy = VectorType::get(IntTy, LaneLen);
}
+ // The base address of the store.
+ Value *BaseAddr = SI->getPointerOperand();
+
+ if (NumStores > 1) {
+ // If we're going to generate more than one store, reset the lane length
+ // and sub-vector type to something legal.
+ LaneLen /= NumStores;
+ SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
+
+ // We will compute the pointer operand of each store from the original base
+ // address using GEPs. Cast the base address to a pointer to the scalar
+ // element type.
+ BaseAddr = Builder.CreateBitCast(
+ BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
+ SI->getPointerAddressSpace()));
+ }
+
+ assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
+
+ auto Mask = SVI->getShuffleMask();
+
+ Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
+ Type *Tys[] = {Int8Ptr, SubVecTy};
static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
Intrinsic::arm_neon_vst3,
Intrinsic::arm_neon_vst4};
- SmallVector<Value *, 6> Ops;
- Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
- Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
+ for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
- Type *Tys[] = { Int8Ptr, SubVecTy };
- Function *VstNFunc = Intrinsic::getDeclaration(
- SI->getModule(), StoreInts[Factor - 2], Tys);
+ // If we generating more than one store, we compute the base address of
+ // subsequent stores as an offset from the previous.
+ if (StoreCount > 0)
+ BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
- // Split the shufflevector operands into sub vectors for the new vstN call.
- auto Mask = SVI->getShuffleMask();
- for (unsigned i = 0; i < Factor; i++) {
- if (Mask[i] >= 0) {
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
- } else {
- unsigned StartMask = 0;
- for (unsigned j = 1; j < LaneLen; j++) {
- if (Mask[j*Factor + i] >= 0) {
- StartMask = Mask[j*Factor + i] - j;
- break;
+ SmallVector<Value *, 6> Ops;
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
+
+ Function *VstNFunc =
+ Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
+
+ // Split the shufflevector operands into sub vectors for the new vstN call.
+ for (unsigned i = 0; i < Factor; i++) {
+ unsigned IdxI = StoreCount * LaneLen * Factor + i;
+ if (Mask[IdxI] >= 0) {
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
+ } else {
+ unsigned StartMask = 0;
+ for (unsigned j = 1; j < LaneLen; j++) {
+ unsigned IdxJ = StoreCount * LaneLen * Factor + j;
+ if (Mask[IdxJ * Factor + IdxI] >= 0) {
+ StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
+ break;
+ }
}
+ // Note: If all elements in a chunk are undefs, StartMask=0!
+ // Note: Filling undef gaps with random elements is ok, since
+ // those elements were being written anyway (with undefs).
+ // In the case of all undefs we're defaulting to using elems from 0
+ // Note: StartMask cannot be negative, it's checked in
+ // isReInterleaveMask
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
}
- // Note: If all elements in a chunk are undefs, StartMask=0!
- // Note: Filling undef gaps with random elements is ok, since
- // those elements were being written anyway (with undefs).
- // In the case of all undefs we're defaulting to using elems from 0
- // Note: StartMask cannot be negative, it's checked in isReInterleaveMask
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
}
- }
- Ops.push_back(Builder.getInt32(SI->getAlignment()));
- Builder.CreateCall(VstNFunc, Ops);
+ Ops.push_back(Builder.getInt32(SI->getAlignment()));
+ Builder.CreateCall(VstNFunc, Ops);
+ }
return true;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 84c6eb845bb8..70a0b1380ec9 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -175,9 +175,15 @@ class InstrItineraryData;
VMULLs, // ...signed
VMULLu, // ...unsigned
+ SMULWB, // Signed multiply word by half word, bottom
+ SMULWT, // Signed multiply word by half word, top
UMLAL, // 64bit Unsigned Accumulate Multiply
SMLAL, // 64bit Signed Accumulate Multiply
UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
+ SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16
+ SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
+ SMLALTB, // 64-bit signed accumulate multiply top, bottom 16
+ SMLALTT, // 64-bit signed accumulate multiply top, top 16
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
@@ -346,6 +352,7 @@ class InstrItineraryData;
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const override;
@@ -500,9 +507,18 @@ class InstrItineraryData;
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const override;
+ bool canMergeStoresTo(EVT MemVT) const override {
+ // Do not merge to larger than i32.
+ return (MemVT.getSizeInBits() <= 32);
+ }
+
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+ return VT.isScalarInteger();
+ }
+
bool supportSwiftError() const override {
return true;
}
@@ -514,6 +530,17 @@ class InstrItineraryData;
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
+ /// Returns true if \p VecTy is a legal interleaved access type. This
+ /// function checks the vector element type and the overall width of the
+ /// vector.
+ bool isLegalInterleavedAccessType(VectorType *VecTy,
+ const DataLayout &DL) const;
+
+ /// Returns the number of interleaved accesses that will be generated when
+ /// lowering accesses of the given type.
+ unsigned getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -698,7 +725,7 @@ class InstrItineraryData;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- const SDLoc &dl) const;
+ const SDLoc &dl, bool InvalidOnQNaN) const;
SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 488439fc24e0..1bbe7f0d275e 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -184,7 +184,7 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
// ARM special operands for disassembly only.
//
-def SetEndAsmOperand : ImmAsmOperand {
+def SetEndAsmOperand : ImmAsmOperand<0,1> {
let Name = "SetEndImm";
let ParserMethod = "parseSetEndImm";
}
@@ -221,25 +221,25 @@ def banked_reg : Operand<i32> {
// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
// 64 64 - <imm> is encoded in imm6<5:0>
-def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
+def shr_imm8_asm_operand : ImmAsmOperand<1,8> { let Name = "ShrImm8"; }
def shr_imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 8; }]> {
let EncoderMethod = "getShiftRight8Imm";
let DecoderMethod = "DecodeShiftRight8Imm";
let ParserMatchClass = shr_imm8_asm_operand;
}
-def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
+def shr_imm16_asm_operand : ImmAsmOperand<1,16> { let Name = "ShrImm16"; }
def shr_imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 16; }]> {
let EncoderMethod = "getShiftRight16Imm";
let DecoderMethod = "DecodeShiftRight16Imm";
let ParserMatchClass = shr_imm16_asm_operand;
}
-def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
+def shr_imm32_asm_operand : ImmAsmOperand<1,32> { let Name = "ShrImm32"; }
def shr_imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]> {
let EncoderMethod = "getShiftRight32Imm";
let DecoderMethod = "DecodeShiftRight32Imm";
let ParserMatchClass = shr_imm32_asm_operand;
}
-def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
+def shr_imm64_asm_operand : ImmAsmOperand<1,64> { let Name = "ShrImm64"; }
def shr_imm64 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 64; }]> {
let EncoderMethod = "getShiftRight64Imm";
let DecoderMethod = "DecodeShiftRight64Imm";
@@ -261,10 +261,19 @@ def const_pool_asm_imm : Operand<i32> {
// Note: When EmitPriority == 1, the alias will be used for printing
class ARMInstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[IsARM]>;
+class ARMInstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>,
+ Requires<[IsARM,UseNegativeImmediates]>;
class tInstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb]>;
+class tInstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>,
+ Requires<[IsThumb,UseNegativeImmediates]>;
class t2InstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb2]>;
+class t2InstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>,
+ Requires<[IsThumb2,UseNegativeImmediates]>;
class VFP2InstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP2]>;
class VFP2DPInstAlias<string Asm, dag Result, bit EmitPriority = 0>
@@ -948,7 +957,7 @@ class ADivA1I<bits<3> opcod, dag oops, dag iops,
}
// PKH instructions
-def PKHLSLAsmOperand : ImmAsmOperand {
+def PKHLSLAsmOperand : ImmAsmOperand<0,31> {
let Name = "PKHLSLImm";
let ParserMethod = "parsePKHLSLImm";
}
@@ -1013,9 +1022,6 @@ class Thumb2DSPPat<dag pattern, dag result> : Pat<pattern, result> {
class Thumb2DSPMulPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsThumb2, UseMulOps, HasDSP];
}
-class Thumb2ExtractPat<dag pattern, dag result> : Pat<pattern, result> {
- list<Predicate> Predicates = [IsThumb2, HasT2ExtractPack];
-}
//===----------------------------------------------------------------------===//
// Thumb Instruction Format Definitions.
//
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 27b64322dfa9..3b3606ef462a 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -129,8 +129,9 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
MIB.addMemOperand(MMO);
- MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg);
- MIB.addReg(Reg, RegState::Kill).addImm(0);
- MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
- AddDefaultPred(MIB);
+ BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(0)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
+ .add(predOps(ARMCC::AL));
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c47393990e97..cc0e7d4d9c35 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -51,6 +51,8 @@ def SDT_ARMAnd : SDTypeProfile<1, 2,
SDTCisVT<2, i32>]>;
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
@@ -90,6 +92,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
SDTCisVT<1, i32>,
SDTCisVT<4, i32>]>;
+def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisSameAs<0, 4>,
+ SDTCisSameAs<0, 5>]>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
@@ -181,6 +190,13 @@ def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
SDNPMayStore, SDNPMayLoad]>;
+def ARMsmulwb : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>;
+def ARMsmulwt : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>;
+def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>;
+def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>;
+def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>;
+def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@@ -247,9 +263,6 @@ def HasDivide : Predicate<"Subtarget->hasDivide()">,
AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
-def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
- AssemblerPredicate<"FeatureT2XtPk",
- "pack/extract">;
def HasDSP : Predicate<"Subtarget->hasDSP()">,
AssemblerPredicate<"FeatureDSP", "dsp">;
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
@@ -298,6 +311,11 @@ def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
+def UseNegativeImmediates :
+ Predicate<"false">,
+ AssemblerPredicate<"!FeatureNoNegativeImmediates",
+ "NegativeImmediates">;
+
// FIXME: Eventually this will be just "hasV6T2Ops".
def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
@@ -423,7 +441,16 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
//
// Immediate operands with a shared generic asm render method.
-class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; }
+class ImmAsmOperand<int Low, int High> : AsmOperandClass {
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isImmediate<" # Low # "," # High # ">";
+ let DiagnosticType = "ImmRange" # Low # "_" # High;
+}
+
+class ImmAsmOperandMinusOne<int Low, int High> : AsmOperandClass {
+ let PredicateMethod = "isImmediate<" # Low # "," # High # ">";
+ let DiagnosticType = "ImmRange" # Low # "_" # High;
+}
// Operands that are part of a memory addressing mode.
class MemOperand : Operand<i32> { let OperandType = "OPERAND_MEMORY"; }
@@ -645,35 +672,45 @@ def arm_i32imm : PatLeaf<(imm), [{
}]>;
/// imm0_1 predicate - Immediate in the range [0,1].
-def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; }
+def Imm0_1AsmOperand: ImmAsmOperand<0,1> { let Name = "Imm0_1"; }
def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
/// imm0_3 predicate - Immediate in the range [0,3].
-def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
+def Imm0_3AsmOperand: ImmAsmOperand<0,3> { let Name = "Imm0_3"; }
def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
/// imm0_7 predicate - Immediate in the range [0,7].
-def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
+def Imm0_7AsmOperand: ImmAsmOperand<0,7> {
+ let Name = "Imm0_7";
+}
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 8;
}]> {
let ParserMatchClass = Imm0_7AsmOperand;
}
+/// imm8_255 predicate - Immediate in the range [8,255].
+def Imm8_255AsmOperand: ImmAsmOperand<8,255> { let Name = "Imm8_255"; }
+def imm8_255 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 8 && Imm < 256;
+}]> {
+ let ParserMatchClass = Imm8_255AsmOperand;
+}
+
/// imm8 predicate - Immediate is exactly 8.
-def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; }
+def Imm8AsmOperand: ImmAsmOperand<8,8> { let Name = "Imm8"; }
def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> {
let ParserMatchClass = Imm8AsmOperand;
}
/// imm16 predicate - Immediate is exactly 16.
-def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; }
+def Imm16AsmOperand: ImmAsmOperand<16,16> { let Name = "Imm16"; }
def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> {
let ParserMatchClass = Imm16AsmOperand;
}
/// imm32 predicate - Immediate is exactly 32.
-def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; }
+def Imm32AsmOperand: ImmAsmOperand<32,32> { let Name = "Imm32"; }
def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
let ParserMatchClass = Imm32AsmOperand;
}
@@ -681,25 +718,25 @@ def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
def imm8_or_16 : ImmLeaf<i32, [{ return Imm == 8 || Imm == 16;}]>;
/// imm1_7 predicate - Immediate in the range [1,7].
-def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
+def Imm1_7AsmOperand: ImmAsmOperand<1,7> { let Name = "Imm1_7"; }
def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
let ParserMatchClass = Imm1_7AsmOperand;
}
/// imm1_15 predicate - Immediate in the range [1,15].
-def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; }
+def Imm1_15AsmOperand: ImmAsmOperand<1,15> { let Name = "Imm1_15"; }
def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> {
let ParserMatchClass = Imm1_15AsmOperand;
}
/// imm1_31 predicate - Immediate in the range [1,31].
-def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; }
+def Imm1_31AsmOperand: ImmAsmOperand<1,31> { let Name = "Imm1_31"; }
def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
let ParserMatchClass = Imm1_31AsmOperand;
}
/// imm0_15 predicate - Immediate in the range [0,15].
-def Imm0_15AsmOperand: ImmAsmOperand {
+def Imm0_15AsmOperand: ImmAsmOperand<0,15> {
let Name = "Imm0_15";
let DiagnosticType = "ImmRange0_15";
}
@@ -710,7 +747,7 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
-def Imm0_31AsmOperand: ImmAsmOperand { let Name = "Imm0_31"; }
+def Imm0_31AsmOperand: ImmAsmOperand<0,31> { let Name = "Imm0_31"; }
def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 32;
}]> {
@@ -718,15 +755,15 @@ def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32].
-def Imm0_32AsmOperand: ImmAsmOperand { let Name = "Imm0_32"; }
+def Imm0_32AsmOperand: ImmAsmOperand<0,32> { let Name = "Imm0_32"; }
def imm0_32 : Operand<i32>, ImmLeaf<i32, [{
- return Imm >= 0 && Imm < 32;
+ return Imm >= 0 && Imm < 33;
}]> {
let ParserMatchClass = Imm0_32AsmOperand;
}
/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63].
-def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; }
+def Imm0_63AsmOperand: ImmAsmOperand<0,63> { let Name = "Imm0_63"; }
def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 64;
}]> {
@@ -734,7 +771,7 @@ def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_239 predicate - Immediate in the range [0,239].
-def Imm0_239AsmOperand : ImmAsmOperand {
+def Imm0_239AsmOperand : ImmAsmOperand<0,239> {
let Name = "Imm0_239";
let DiagnosticType = "ImmRange0_239";
}
@@ -743,13 +780,13 @@ def imm0_239 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 240; }]> {
}
/// imm0_255 predicate - Immediate in the range [0,255].
-def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
+def Imm0_255AsmOperand : ImmAsmOperand<0,255> { let Name = "Imm0_255"; }
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
let ParserMatchClass = Imm0_255AsmOperand;
}
-/// imm0_65535 - An immediate is in the range [0.65535].
-def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; }
+/// imm0_65535 - An immediate is in the range [0,65535].
+def Imm0_65535AsmOperand: ImmAsmOperand<0,65535> { let Name = "Imm0_65535"; }
def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 65536;
}]> {
@@ -767,19 +804,23 @@ def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{
// FIXME: This really needs a Thumb version separate from the ARM version.
// While the range is the same, and can thus use the same match class,
// the encoding is different so it should have a different encoder method.
-def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; }
+def Imm0_65535ExprAsmOperand: AsmOperandClass {
+ let Name = "Imm0_65535Expr";
+ let RenderMethod = "addImmOperands";
+}
+
def imm0_65535_expr : Operand<i32> {
let EncoderMethod = "getHiLo16ImmOpValue";
let ParserMatchClass = Imm0_65535ExprAsmOperand;
}
-def Imm256_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm256_65535Expr"; }
+def Imm256_65535ExprAsmOperand: ImmAsmOperand<256,65535> { let Name = "Imm256_65535Expr"; }
def imm256_65535_expr : Operand<i32> {
let ParserMatchClass = Imm256_65535ExprAsmOperand;
}
/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
-def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; }
+def Imm24bitAsmOperand: ImmAsmOperand<0,0xffffff> { let Name = "Imm24bit"; }
def imm24b : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm <= 0xffffff;
}]> {
@@ -808,7 +849,9 @@ def imm1_32_XFORM: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N),
MVT::i32);
}]>;
-def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; }
+def Imm1_32AsmOperand: ImmAsmOperandMinusOne<1,32> {
+ let Name = "Imm1_32";
+}
def imm1_32 : Operand<i32>, PatLeaf<(imm), [{
uint64_t Imm = N->getZExtValue();
return Imm > 0 && Imm <= 32;
@@ -822,7 +865,7 @@ def imm1_16_XFORM: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N),
MVT::i32);
}]>;
-def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; }
+def Imm1_16AsmOperand: ImmAsmOperandMinusOne<1,16> { let Name = "Imm1_16"; }
def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
imm1_16_XFORM> {
let PrintMethod = "printImmPlusOneOperand";
@@ -3850,6 +3893,7 @@ def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm,
let Inst{11-0} = imm;
}
+let AddedComplexity = 1 in
def : ARMPat<(and GPR:$src, mod_imm_not:$imm),
(BICri GPR:$src, mod_imm_not:$imm)>;
@@ -3899,7 +3943,8 @@ def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm),
IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
[(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{15-12} = 0b0000;
let Unpredictable{15-12} = 0b1111;
}
@@ -3910,14 +3955,16 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
4, IIC_iMUL32,
[(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
(MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6, UseMulOps]>;
+ Requires<[IsARM, NoV6, UseMulOps]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
}
def MLA : AsMul1I32<0b0000001, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra),
IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))]>,
- Requires<[IsARM, HasV6, UseMulOps]> {
+ Requires<[IsARM, HasV6, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
bits<4> Ra;
let Inst{15-12} = Ra;
}
@@ -3928,12 +3975,14 @@ def MLAv5: ARMPseudoExpand<(outs GPRnopc:$Rd),
pred:$p, cc_out:$s), 4, IIC_iMAC32,
[(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))],
(MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
- Requires<[IsARM, HasV6T2, UseMulOps]> {
+ Requires<[IsARM, HasV6T2, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
bits<4> Rd;
bits<4> Rm;
bits<4> Rn;
@@ -3949,26 +3998,38 @@ let hasSideEffects = 0 in {
let isCommutable = 1 in {
def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
- "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]>;
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [(set GPR:$RdLo, GPR:$RdHi,
+ (smullohi GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
- "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]>;
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [(set GPR:$RdLo, GPR:$RdHi,
+ (umullohi GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL]>;
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
- 4, IIC_iMUL64, [],
+ 4, IIC_iMUL64,
+ [(set GPR:$RdLo, GPR:$RdHi,
+ (smullohi GPR:$Rn, GPR:$Rm))],
(SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
- 4, IIC_iMUL64, [],
+ 4, IIC_iMUL64,
+ [(set GPR:$RdLo, GPR:$RdHi,
+ (umullohi GPR:$Rn, GPR:$Rm))],
(UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
}
}
@@ -3976,17 +4037,20 @@ def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
"smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
"umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> {
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rm;
@@ -4004,13 +4068,15 @@ def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
4, IIC_iMAC64, [],
(SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
4, IIC_iMAC64, [],
(UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
}
} // hasSideEffects
@@ -4019,13 +4085,15 @@ def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{15-12} = 0b1111;
}
def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{15-12} = 0b1111;
}
@@ -4033,57 +4101,67 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
- Requires<[IsARM, HasV6, UseMulOps]>;
+ Requires<[IsARM, HasV6, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6, UseMulOps]>;
+ Requires<[IsARM, HasV6, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
multiclass AI_smul<string opc> {
def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
(sext_inreg GPR:$Rm, i16)))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
(sra GPR:$Rm, (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
(sext_inreg GPR:$Rm, i16)))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
(sra GPR:$Rm, (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
- []>,
- Requires<[IsARM, HasV5TE]>;
+ [(set GPR:$Rd, (ARMsmulwb GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
- []>,
- Requires<[IsARM, HasV5TE]>;
+ [(set GPR:$Rd, (ARMsmulwt GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
}
@@ -4095,7 +4173,8 @@ multiclass AI_smla<string opc> {
[(set GPRnopc:$Rd, (add GPR:$Ra,
(mul (sext_inreg GPRnopc:$Rn, i16),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -4103,7 +4182,8 @@ multiclass AI_smla<string opc> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -4111,7 +4191,8 @@ multiclass AI_smla<string opc> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -4119,19 +4200,24 @@ multiclass AI_smla<string opc> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
- []>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (ARMsmulwb GPRnopc:$Rn, GPRnopc:$Rm)))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
- []>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (ARMsmulwt GPRnopc:$Rn, GPRnopc:$Rm)))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
}
}
@@ -4139,25 +4225,28 @@ defm SMUL : AI_smul<"smul">;
defm SMLA : AI_smla<"smla">;
// Halfword multiply accumulate long: SMLAL<x><y>.
-def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV5TE]>;
-
-def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV5TE]>;
-
-def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV5TE]>;
-
-def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV5TE]>;
+class SMLAL<bits<2> opc1, string asm>
+ : AMulxyI64<0b0001010, opc1,
+ (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ IIC_iMAC64, asm, "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
+
+def SMLALBB : SMLAL<0b00, "smlalbb">;
+def SMLALBT : SMLAL<0b10, "smlalbt">;
+def SMLALTB : SMLAL<0b01, "smlaltb">;
+def SMLALTT : SMLAL<0b11, "smlaltt">;
+
+def : ARMV5TEPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (SMLALBB $Rn, $Rm, $RLo, $RHi)>;
+def : ARMV5TEPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (SMLALBT $Rn, $Rm, $RLo, $RHi)>;
+def : ARMV5TEPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (SMLALTB $Rn, $Rm, $RLo, $RHi)>;
+def : ARMV5TEPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (SMLALTT $Rn, $Rm, $RLo, $RHi)>;
// Helper class for AI_smld.
class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
@@ -4203,19 +4292,23 @@ multiclass AI_smld<bit sub, string opc> {
def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
- NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
- NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
(ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
- !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
+ !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
(ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
- !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
+ !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
}
@@ -4225,9 +4318,11 @@ defm SMLS : AI_smld<1, "smls">;
multiclass AI_sdml<bit sub, string opc> {
def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
- NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm),
- NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
}
defm SMUA : AI_sdml<0, "smua">;
@@ -4239,12 +4334,14 @@ defm SMUS : AI_sdml<1, "smus">;
def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM, HasDivideInARM]>;
+ Requires<[IsARM, HasDivideInARM]>,
+ Sched<[WriteDIV]>;
def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM, HasDivideInARM]>;
+ Requires<[IsARM, HasDivideInARM]>,
+ Sched<[WriteDIV]>;
//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
@@ -4831,14 +4928,15 @@ let AddedComplexity = 8 in {
def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>;
}
-// SWP/SWPB are deprecated in V6/V7.
+// SWP/SWPB are deprecated in V6/V7 and optional in v7VE.
+// FIXME Use InstAlias to generate LDREX/STREX pairs instead.
let mayLoad = 1, mayStore = 1 in {
def SWP : AIswp<0, (outs GPRnopc:$Rt),
(ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>,
- Requires<[PreV8]>;
+ Requires<[IsARM,PreV8]>;
def SWPB: AIswp<1, (outs GPRnopc:$Rt),
(ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>,
- Requires<[PreV8]>;
+ Requires<[IsARM,PreV8]>;
}
//===----------------------------------------------------------------------===//
@@ -4850,7 +4948,7 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
- Requires<[PreV8]> {
+ Requires<[IsARM,PreV8]> {
bits<4> opc1;
bits<4> CRn;
bits<4> CRd;
@@ -4872,7 +4970,7 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
- Requires<[PreV8]> {
+ Requires<[IsARM,PreV8]> {
let Inst{31-28} = 0b1111;
bits<4> opc1;
bits<4> CRn;
@@ -5048,13 +5146,13 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> {
defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
-defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
-defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
+defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
+defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
//===----------------------------------------------------------------------===//
// Move between coprocessor and ARM core register.
@@ -5132,7 +5230,7 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
- Requires<[PreV8]>;
+ Requires<[IsARM,PreV8]>;
def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0)>;
@@ -5140,7 +5238,7 @@ def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(outs GPRwithAPSR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>,
- Requires<[PreV8]>;
+ Requires<[IsARM,PreV8]>;
def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
(MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0)>;
@@ -5183,7 +5281,7 @@ class MovRRCopro2<string opc, bit direction, dag oops, dag iops,
list<dag> pattern = []>
: ABXI<0b1100, oops, iops, NoItinerary,
!strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>,
- Requires<[PreV8]> {
+ Requires<[IsARM,PreV8]> {
let Inst{31-28} = 0b1111;
let Inst{23-21} = 0b010;
let Inst{20} = direction;
@@ -5525,20 +5623,26 @@ def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
// smul* and smla*
def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
- (SMULBB GPR:$a, GPR:$b)>;
+ (SMULBB GPR:$a, GPR:$b)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
- (SMULBT GPR:$a, GPR:$b)>;
+ (SMULBT GPR:$a, GPR:$b)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
- (SMULTB GPR:$a, GPR:$b)>;
+ (SMULTB GPR:$a, GPR:$b)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, sext_16_node:$b)),
- (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
- (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
- (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
// Pre-v7 uses MCR for synchronization barriers.
def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
@@ -5717,33 +5821,49 @@ def : MnemonicAlias<"usubaddx", "usax">;
// "mov Rd, mod_imm_not" can be handled via "mvn" in assembly, just like
// for isel.
-def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
+def : ARMInstSubst<"mov${s}${p} $Rd, $imm",
(MVNi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
+def : ARMInstSubst<"mvn${s}${p} $Rd, $imm",
(MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
// Same for AND <--> BIC
-def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+def : ARMInstSubst<"bic${s}${p} $Rd, $Rn, $imm",
(ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
+def : ARMInstSubst<"bic${s}${p} $Rdn, $imm",
(ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
+def : ARMInstSubst<"and${s}${p} $Rd, $Rn, $imm",
(BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
+def : ARMInstSubst<"and${s}${p} $Rdn, $imm",
(BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
// Likewise, "add Rd, mod_imm_neg" -> sub
-def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
+def : ARMInstSubst<"add${s}${p} $Rd, $Rn, $imm",
(SUBri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"add${s}${p} $Rd, $imm",
+def : ARMInstSubst<"add${s}${p} $Rd, $imm",
(SUBri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Likewise, "sub Rd, mod_imm_neg" -> add
+def : ARMInstSubst<"sub${s}${p} $Rd, $Rn, $imm",
+ (ADDri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstSubst<"sub${s}${p} $Rd, $imm",
+ (ADDri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+
+
+def : ARMInstSubst<"adc${s}${p} $Rd, $Rn, $imm",
+ (SBCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstSubst<"adc${s}${p} $Rdn, $imm",
+ (SBCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstSubst<"sbc${s}${p} $Rd, $Rn, $imm",
+ (ADCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstSubst<"sbc${s}${p} $Rdn, $imm",
+ (ADCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+
// Same for CMP <--> CMN via mod_imm_neg
-def : ARMInstAlias<"cmp${p} $Rd, $imm",
+def : ARMInstSubst<"cmp${p} $Rd, $imm",
(CMNri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>;
-def : ARMInstAlias<"cmn${p} $Rd, $imm",
+def : ARMInstSubst<"cmn${p} $Rd, $imm",
(CMPri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>;
// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index b5fa8e999e2a..681e235d78f0 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -7139,6 +7139,17 @@ let Predicates = [IsBE] in {
(f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
}
+def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+
//===----------------------------------------------------------------------===//
// Assembler aliases
//
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index a681f64b05e6..f2f426e86701 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -19,7 +19,7 @@ def imm_sr_XFORM: SDNodeXForm<imm, [{
unsigned Imm = N->getZExtValue();
return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32);
}]>;
-def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; }
+def ThumbSRImmAsmOperand: ImmAsmOperand<1,32> { let Name = "ImmThumbSR"; }
def imm_sr : Operand<i32>, PatLeaf<(imm), [{
uint64_t Imm = N->getZExtValue();
return Imm > 0 && Imm <= 32;
@@ -28,22 +28,31 @@ def imm_sr : Operand<i32>, PatLeaf<(imm), [{
let ParserMatchClass = ThumbSRImmAsmOperand;
}
-def imm_comp_XFORM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), SDLoc(N),
- MVT::i32);
-}]>;
-
def imm0_7_neg : PatLeaf<(i32 imm), [{
return (uint32_t)-N->getZExtValue() < 8;
}], imm_neg_XFORM>;
+def ThumbModImmNeg1_7AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg1_7"; }
+def mod_imm1_7_neg : Operand<i32>, PatLeaf<(imm), [{
+ unsigned Value = -(unsigned)N->getZExtValue();
+ return 0 < Value && Value < 8;
+ }], imm_neg_XFORM> {
+ let ParserMatchClass = ThumbModImmNeg1_7AsmOperand;
+}
+
+def ThumbModImmNeg8_255AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg8_255"; }
+def mod_imm8_255_neg : Operand<i32>, PatLeaf<(imm), [{
+ unsigned Value = -(unsigned)N->getZExtValue();
+ return 7 < Value && Value < 256;
+ }], imm_neg_XFORM> {
+ let ParserMatchClass = ThumbModImmNeg8_255AsmOperand;
+}
+
+
def imm0_255_comp : PatLeaf<(i32 imm), [{
return ~((uint32_t)N->getZExtValue()) < 256;
}]>;
-def imm8_255 : ImmLeaf<i32, [{
- return Imm >= 8 && Imm < 256;
-}]>;
def imm8_255_neg : PatLeaf<(i32 imm), [{
unsigned Val = -N->getZExtValue();
return Val >= 8 && Val < 256;
@@ -407,9 +416,9 @@ def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
let DecoderMethod = "DecodeThumbAddSPImm";
}
-def : tInstAlias<"add${p} sp, $imm",
+def : tInstSubst<"add${p} sp, $imm",
(tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
-def : tInstAlias<"add${p} sp, sp, $imm",
+def : tInstSubst<"add${p} sp, sp, $imm",
(tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
// Can optionally specify SP as a three operand instruction.
@@ -910,7 +919,7 @@ let isAdd = 1 in {
def tADC : // A8.6.2
T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
"adc", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+ []>, Sched<[WriteALU]>;
// Add immediate
def tADDi3 : // A8.6.4 T1
@@ -938,6 +947,43 @@ let isAdd = 1 in {
"add", "\t$Rd, $Rn, $Rm",
[(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+ /// Similar to the above except these set the 's' bit so the
+ /// instruction modifies the CPSR register.
+ ///
+ /// These opcodes will be converted to the real non-S opcodes by
+ /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+ let hasPostISelHook = 1, Defs = [CPSR] in {
+ let isCommutable = 1 in
+ def tADCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rdn, CPSR, (ARMadde tGPR:$Rn, tGPR:$Rm,
+ CPSR))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tADDSi3 : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ 2, IIC_iALUi,
+ [(set tGPR:$Rd, CPSR, (ARMaddc tGPR:$Rm,
+ imm0_7:$imm3))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tADDSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8),
+ 2, IIC_iALUi,
+ [(set tGPR:$Rdn, CPSR, (ARMaddc tGPR:$Rn,
+ imm8_255:$imm8))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ let isCommutable = 1 in
+ def tADDSrr : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rd, CPSR, (ARMaddc tGPR:$Rn,
+ tGPR:$Rm))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+ }
+
let hasSideEffects = 0 in
def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
"add", "\t$Rdn, $Rm", []>,
@@ -951,6 +997,12 @@ let isAdd = 1 in {
}
}
+def : tInstSubst<"sub${s}${p} $rd, $rn, $imm",
+ (tADDi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>;
+def : tInstSubst<"sub${s}${p} $rdn, $imm",
+ (tADDi8 tGPR:$rdn, s_cc_out:$s, mod_imm8_255_neg:$imm, pred:$p)>;
+
+
// AND register
let isCommutable = 1 in
def tAND : // A8.6.12
@@ -1197,7 +1249,7 @@ def tSBC : // A8.6.151
T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iALUr,
"sbc", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>,
+ []>,
Sched<[WriteALU]>;
// Subtract immediate
@@ -1218,6 +1270,14 @@ def tSUBi8 : // A8.6.210 T2
[(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>,
Sched<[WriteALU]>;
+def : tInstSubst<"add${s}${p} $rd, $rn, $imm",
+ (tSUBi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>;
+
+
+def : tInstSubst<"add${s}${p} $rdn, $imm",
+ (tSUBi8 tGPR:$rdn, s_cc_out:$s, mod_imm8_255_neg:$imm, pred:$p)>;
+
+
// Subtract register
def tSUBrr : // A8.6.212
T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
@@ -1226,6 +1286,41 @@ def tSUBrr : // A8.6.212
[(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>,
Sched<[WriteALU]>;
+/// Similar to the above except these set the 's' bit so the
+/// instruction modifies the CPSR register.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+ def tSBCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rdn, CPSR, (ARMsube tGPR:$Rn, tGPR:$Rm,
+ CPSR))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tSUBSi3 : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ 2, IIC_iALUi,
+ [(set tGPR:$Rd, CPSR, (ARMsubc tGPR:$Rm,
+ imm0_7:$imm3))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tSUBSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8),
+ 2, IIC_iALUi,
+ [(set tGPR:$Rdn, CPSR, (ARMsubc tGPR:$Rn,
+ imm8_255:$imm8))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tSUBSrr : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rd, CPSR, (ARMsubc tGPR:$Rn,
+ tGPR:$Rm))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+}
+
// Sign-extend byte
def tSXTB : // A8.6.222
T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@@ -1386,22 +1481,6 @@ def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
(tCMPr tGPR:$Rn, tGPR:$Rm)>;
-// Add with carry
-def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs),
- (tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
-def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs),
- (tADDi8 tGPR:$lhs, imm8_255:$rhs)>;
-def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs),
- (tADDrr tGPR:$lhs, tGPR:$rhs)>;
-
-// Subtract with carry
-def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs),
- (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
-def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs),
- (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
-def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
- (tSUBrr tGPR:$lhs, tGPR:$rhs)>;
-
// Bswap 16 with load/store
def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
(tREV16 (tLDRHi t_addrmode_is2:$addr))>;
@@ -1477,7 +1556,7 @@ def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>;
// post-inc LDR -> LDM r0!, {r1}. The way operands are layed out in LDMs is
// different to how ISel expects them for a post-inc load, so use a pseudo
// and expand it just after ISel.
-let usesCustomInserter = 1,
+let usesCustomInserter = 1, mayLoad =1,
Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in
def tLDR_postidx: tPseudoInst<(outs rGPR:$Rt, rGPR:$Rn_wb),
(ins rGPR:$Rn, pred:$p),
@@ -1547,7 +1626,7 @@ def : T1Pat<(i32 thumb_immshifted:$src),
(thumb_immshifted_shamt imm:$src))>;
def : T1Pat<(i32 imm0_255_comp:$src),
- (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+ (tMVN (tMOVi8 (imm_not_XFORM imm:$src)))>;
def : T1Pat<(i32 imm256_510:$src),
(tADDi8 (tMOVi8 255),
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 603d66403e65..f5b673b78ad7 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -76,7 +76,11 @@ def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{
// t2_so_imm - Match a 32-bit immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
// immediate splatted into multiple bytes of the word.
-def t2_so_imm_asmoperand : ImmAsmOperand { let Name = "T2SOImm"; }
+def t2_so_imm_asmoperand : AsmOperandClass {
+ let Name = "T2SOImm";
+ let RenderMethod = "addImmOperands";
+
+}
def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
return ARM_AM::getT2SOImmVal(Imm) != -1;
}]> {
@@ -110,15 +114,14 @@ def t2_so_imm_notSext : Operand<i32>, PatLeaf<(imm), [{
// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
-def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
- int64_t Value = -(int)N->getZExtValue();
- return Value && ARM_AM::getT2SOImmVal(Value) != -1;
+def t2_so_imm_neg : Operand<i32>, ImmLeaf<i32, [{
+ return Imm && ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
}], t2_so_imm_neg_XFORM> {
let ParserMatchClass = t2_so_imm_neg_asmoperand;
}
-/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
-def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; }
+/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0,4095].
+def imm0_4095_asmoperand: ImmAsmOperand<0,4095> { let Name = "Imm0_4095"; }
def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 4096;
}]> {
@@ -139,7 +142,7 @@ def imm1_255_neg : PatLeaf<(i32 imm), [{
def imm0_255_not : PatLeaf<(i32 imm), [{
return (uint32_t)(~N->getZExtValue()) < 255;
-}], imm_comp_XFORM>;
+}], imm_not_XFORM>;
def lo5AllOne : PatLeaf<(i32 imm), [{
// Returns true if all low 5-bits are 1.
@@ -538,7 +541,8 @@ class T2FourReg<dag oops, dag iops, InstrItinClass itin,
class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
string opc, list<dag> pattern>
: T2I<(outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
- opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern> {
+ opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern>,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rn;
@@ -556,7 +560,8 @@ class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, string opc>
: T2I<(outs rGPR:$RdLo, rGPR:$RdHi),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
opc, "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi"> {
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rn;
@@ -977,7 +982,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
PatFrag opnode> {
def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]>,
+ Sched<[WriteLd]> {
bits<4> Rt;
bits<17> addr;
let Inst{31-25} = 0b1111100;
@@ -993,7 +999,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
}
def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
opc, "\t$Rt, $addr",
- [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]>,
+ Sched<[WriteLd]> {
bits<4> Rt;
bits<13> addr;
let Inst{31-27} = 0b11111;
@@ -1015,7 +1022,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
}
def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis,
opc, ".w\t$Rt, $addr",
- [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]>,
+ Sched<[WriteLd]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
@@ -1039,7 +1047,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
// from the PC.
def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
+ [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]>,
+ Sched<[WriteLd]> {
let isReMaterializable = 1;
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
@@ -1065,7 +1074,8 @@ multiclass T2I_st<bits<2> opcod, string opc,
PatFrag opnode> {
def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(opnode target:$Rt, t2addrmode_imm12:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_imm12:$addr)]>,
+ Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0001;
let Inst{22-21} = opcod;
@@ -1082,7 +1092,8 @@ multiclass T2I_st<bits<2> opcod, string opc,
}
def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii,
opc, "\t$Rt, $addr",
- [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_negimm8:$addr)]>,
+ Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
let Inst{22-21} = opcod;
@@ -1102,7 +1113,8 @@ multiclass T2I_st<bits<2> opcod, string opc,
}
def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis,
opc, ".w\t$Rt, $addr",
- [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_so_reg:$addr)]>,
+ Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
let Inst{22-21} = opcod;
@@ -1121,28 +1133,10 @@ multiclass T2I_st<bits<2> opcod, string opc,
/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-class T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode>
- : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
- opc, ".w\t$Rd, $Rm$rot",
- [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
- Requires<[IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
-}
-
-// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
-class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode>
- : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot),
- IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
- [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
+class T2I_ext_rrot_base<bits<3> opcod, dag iops, dag oops,
+ string opc, string oprs,
+ list<dag> pattern>
+ : T2TwoReg<iops, oops, IIC_iEXTr, opc, oprs, pattern> {
bits<2> rot;
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -1150,46 +1144,34 @@ class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode>
let Inst{19-16} = 0b1111; // Rn
let Inst{15-12} = 0b1111;
let Inst{7} = 1;
- let Inst{5-4} = rot;
-}
-
-// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
-// supported yet.
-class T2I_ext_rrot_sxtb16<bits<3> opcod, string opc>
- : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
- opc, "\t$Rd, $Rm$rot", []>,
- Requires<[IsThumb2, HasT2ExtractPack]> {
- bits<2> rot;
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = rot;
-}
+ let Inst{5-4} = rot; // rotate
+}
+
+class T2I_ext_rrot<bits<3> opcod, string opc>
+ : T2I_ext_rrot_base<opcod,
+ (outs rGPR:$Rd),
+ (ins rGPR:$Rm, rot_imm:$rot),
+ opc, ".w\t$Rd, $Rm$rot", []>,
+ Requires<[IsThumb2]>,
+ Sched<[WriteALU, ReadALU]>;
+
+// UXTB16, SXTB16 - Requires HasDSP, does not need the .w qualifier.
+class T2I_ext_rrot_xtb16<bits<3> opcod, string opc>
+ : T2I_ext_rrot_base<opcod,
+ (outs rGPR:$Rd),
+ (ins rGPR:$Rm, rot_imm:$rot),
+ opc, "\t$Rd, $Rm$rot", []>,
+ Requires<[HasDSP, IsThumb2]>,
+ Sched<[WriteALU, ReadALU]>;
/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode>
+class T2I_exta_rrot<bits<3> opcod, string opc>
: T2ThreeReg<(outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
- IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, (rotr rGPR:$Rm,rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
- bits<2> rot;
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = rot;
-}
-
-class T2I_exta_rrot_np<bits<3> opcod, string opc>
- : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot),
IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
+ Requires<[HasDSP, IsThumb2]>,
+ Sched<[WriteALU, ReadALU]> {
bits<2> rot;
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -1279,7 +1261,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr),
- IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>;
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>,
+ Sched<[WriteLd]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
// zextload i1 -> zextload i8
@@ -1333,17 +1316,20 @@ let mayLoad = 1, hasSideEffects = 0 in {
def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
- "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+ "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
- "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+ "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
@@ -1353,41 +1339,45 @@ def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+ "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
"ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
- []>;
+ []>, Sched<[WriteLd]>;
def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
"ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
- []>;
+ []>, Sched<[WriteLd]>;
def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
} // mayLoad = 1, hasSideEffects = 0
// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
: T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc,
- "\t$Rt, $addr", []> {
+ "\t$Rt, $addr", []>, Sched<[WriteLd]> {
bits<4> Rt;
bits<13> addr;
let Inst{31-27} = 0b11111;
@@ -1431,11 +1421,14 @@ class T2Ildacq<bits<4> bits23_20, bits<2> bit54, dag oops, dag iops,
}
def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt),
- (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>;
+ (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>,
+ Sched<[WriteLd]>;
def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt),
- (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>;
+ (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>,
+ Sched<[WriteLd]>;
def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt),
- (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>;
+ (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>,
+ Sched<[WriteLd]>;
// Store
defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, store>;
@@ -1448,7 +1441,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
- IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>,
+ Sched<[WriteST]>;
// Indexed stores
@@ -1457,19 +1451,22 @@ def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
(ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"str", "\t$Rt, $addr!",
- "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>,
+ Sched<[WriteST]>;
def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"strh", "\t$Rt, $addr!",
- "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>,
+ Sched<[WriteST]>;
def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
"strb", "\t$Rt, $addr!",
- "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>,
+ Sched<[WriteST]>;
} // mayStore = 1, hasSideEffects = 0
def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
@@ -1480,7 +1477,8 @@ def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
(post_store GPRnopc:$Rt, addr_offset_none:$Rn,
- t2am_imm8_offset:$offset))]>;
+ t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, addr_offset_none:$Rn,
@@ -1490,7 +1488,8 @@ def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
(post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn,
- t2am_imm8_offset:$offset))]>;
+ t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, addr_offset_none:$Rn,
@@ -1500,7 +1499,8 @@ def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb),
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
(post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn,
- t2am_imm8_offset:$offset))]>;
+ t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
// put the patterns on the instruction definitions directly as ISel wants
@@ -1513,17 +1513,20 @@ def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
4, IIC_iStore_ru,
[(set GPRnopc:$Rn_wb,
- (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+ (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
4, IIC_iStore_ru,
[(set GPRnopc:$Rn_wb,
- (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+ (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
4, IIC_iStore_ru,
[(set GPRnopc:$Rn_wb,
- (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+ (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
}
// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
@@ -1531,7 +1534,7 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
class T2IstT<bits<2> type, string opc, InstrItinClass ii>
: T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
- "\t$Rt, $addr", []> {
+ "\t$Rt, $addr", []>, Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = 0; // not signed
@@ -1557,7 +1560,8 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
let mayLoad = 1 in
def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru,
- "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
+ "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []>,
+ Sched<[WriteLd]> {
let DecoderMethod = "DecodeT2LDRDPreInstruction";
}
@@ -1565,13 +1569,13 @@ let mayLoad = 1 in
def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
- "$addr.base = $wb", []>;
+ "$addr.base = $wb", []>, Sched<[WriteLd]>;
let mayStore = 1 in
def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
- "$addr.base = $wb", []> {
+ "$addr.base = $wb", []>, Sched<[WriteST]> {
let DecoderMethod = "DecodeT2STRDPreInstruction";
}
@@ -1580,12 +1584,13 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
t2am_imm8s4_offset:$imm),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm",
- "$addr.base = $wb", []>;
+ "$addr.base = $wb", []>, Sched<[WriteST]>;
class T2Istrrel<bits<2> bit54, dag oops, dag iops,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, opc,
- asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]> {
+ asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]>,
+ Sched<[WriteST]> {
bits<4> Rt;
bits<4> addr;
@@ -1861,7 +1866,7 @@ defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
//
let hasSideEffects = 0 in
-def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rm), IIC_iMOVr,
"mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -1870,11 +1875,11 @@ def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
let Inst{14-12} = 0b000;
let Inst{7-4} = 0b0000;
}
-def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm,
pred:$p, zero_reg)>;
-def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm,
pred:$p, CPSR)>;
-def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm,
pred:$p, CPSR)>;
// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
@@ -1926,10 +1931,11 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
def : InstAlias<"mov${p} $Rd, $imm",
(t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p), 0>,
- Requires<[IsThumb, HasV8MBaseline]>;
+ Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteALU]>;
def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
- (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
+ Sched<[WriteALU]>;
let Constraints = "$src = $Rd" in {
def t2MOVTi16 : T2I<(outs rGPR:$Rd),
@@ -1969,31 +1975,39 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
// Sign extenders
-def t2SXTB : T2I_ext_rrot<0b100, "sxtb",
- UnOpFrag<(sext_inreg node:$Src, i8)>>;
-def t2SXTH : T2I_ext_rrot<0b000, "sxth",
- UnOpFrag<(sext_inreg node:$Src, i16)>>;
-def t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
+def t2SXTB : T2I_ext_rrot<0b100, "sxtb">;
+def t2SXTH : T2I_ext_rrot<0b000, "sxth">;
+def t2SXTB16 : T2I_ext_rrot_xtb16<0b010, "sxtb16">;
+
+def t2SXTAB : T2I_exta_rrot<0b100, "sxtab">;
+def t2SXTAH : T2I_exta_rrot<0b000, "sxtah">;
+def t2SXTAB16 : T2I_exta_rrot<0b010, "sxtab16">;
+
+def : T2Pat<(sext_inreg (rotr rGPR:$Rn, rot_imm:$rot), i8),
+ (t2SXTB rGPR:$Rn, rot_imm:$rot)>;
+def : T2Pat<(sext_inreg (rotr rGPR:$Rn, rot_imm:$rot), i16),
+ (t2SXTH rGPR:$Rn, rot_imm:$rot)>;
+def : Thumb2DSPPat<(add rGPR:$Rn,
+ (sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i8)),
+ (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(add rGPR:$Rn,
+ (sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
-def t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
- BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-def t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
- BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
-def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
// A simple right-shift can also be used in most cases (the exception is the
// SXTH operations with a rotate of 24: there the non-contiguous bits are
// relevant).
-def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg
(srl rGPR:$Rm, rot_imm:$rot), i8)),
(t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg
(srl rGPR:$Rm, imm8_or_16:$rot), i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg
(rotr rGPR:$Rm, (i32 24)), i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg
(or (srl rGPR:$Rm, (i32 24)),
(shl rGPR:$Rm, (i32 8))), i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>;
@@ -2001,12 +2015,16 @@ def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
// Zero extenders
let AddedComplexity = 16 in {
-def t2UXTB : T2I_ext_rrot<0b101, "uxtb",
- UnOpFrag<(and node:$Src, 0x000000FF)>>;
-def t2UXTH : T2I_ext_rrot<0b001, "uxth",
- UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
- UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+def t2UXTB : T2I_ext_rrot<0b101, "uxtb">;
+def t2UXTH : T2I_ext_rrot<0b001, "uxth">;
+def t2UXTB16 : T2I_ext_rrot_xtb16<0b011, "uxtb16">;
+
+def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x000000FF),
+ (t2UXTB rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x0000FFFF),
+ (t2UXTH rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x00FF00FF),
+ (t2UXTB16 rGPR:$Rm, rot_imm:$rot)>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
// The transformation should probably be done as a combiner action
@@ -2014,21 +2032,25 @@ def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
// eight bits of the source into the lower eight bits of the result.
//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
// (t2UXTB16 rGPR:$Src, 3)>,
-// Requires<[HasT2ExtractPack, IsThumb2]>;
+// Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
(t2UXTB16 rGPR:$Src, 1)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
-def t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
- BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
- BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
-def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
+def t2UXTAB : T2I_exta_rrot<0b101, "uxtab">;
+def t2UXTAH : T2I_exta_rrot<0b001, "uxtah">;
+def t2UXTAB16 : T2I_exta_rrot<0b011, "uxtab16">;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot),
+def : Thumb2DSPPat<(add rGPR:$Rn, (and (rotr rGPR:$Rm, rot_imm:$rot),
+ 0x00FF)),
+ (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(add rGPR:$Rn, (and (rotr rGPR:$Rm, rot_imm:$rot),
+ 0xFFFF)),
+ (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot),
0xFF)),
(t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot),
+def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot),
0xFFFF)),
(t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
}
@@ -2060,6 +2082,19 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>;
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>;
}
+def : t2InstSubst<"adc${s}${p} $rd, $rn, $imm",
+ (t2SBCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"sbc${s}${p} $rd, $rn, $imm",
+ (t2ADCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
+
+def : t2InstSubst<"add${s}${p}.w $rd, $rn, $imm",
+ (t2SUBri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"addw${p} $rd, $rn, $imm",
+ (t2SUBri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>;
+def : t2InstSubst<"sub${s}${p}.w $rd, $rn, $imm",
+ (t2ADDri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"subw${p} $rd, $rn, $imm",
+ (t2ADDri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>;
// RSB
defm t2RSB : T2I_rbin_irs <0b1110, "rsb", sub>;
@@ -2230,70 +2265,52 @@ def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
Requires<[IsThumb2, HasDSP]>;
// Signed/Unsigned saturate.
-class T2SatI<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
+class T2SatI<dag iops, string opc, string asm>
+ : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, []> {
bits<4> Rd;
bits<4> Rn;
bits<5> sat_imm;
- bits<7> sh;
+ bits<6> sh;
- let Inst{11-8} = Rd;
+ let Inst{31-24} = 0b11110011;
+ let Inst{21} = sh{5};
+ let Inst{20} = 0;
let Inst{19-16} = Rn;
- let Inst{4-0} = sat_imm;
- let Inst{21} = sh{5};
+ let Inst{15} = 0;
let Inst{14-12} = sh{4-2};
- let Inst{7-6} = sh{1-0};
+ let Inst{11-8} = Rd;
+ let Inst{7-6} = sh{1-0};
+ let Inst{5} = 0;
+ let Inst{4-0} = sat_imm;
}
-def t2SSAT: T2SatI<
- (outs rGPR:$Rd),
- (ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
- NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>,
- Requires<[IsThumb2]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1100;
- let Inst{20} = 0;
- let Inst{15} = 0;
+def t2SSAT: T2SatI<(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ "ssat", "\t$Rd, $sat_imm, $Rn$sh">,
+ Requires<[IsThumb2]> {
+ let Inst{23-22} = 0b00;
let Inst{5} = 0;
}
-def t2SSAT16: T2SatI<
- (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
- "ssat16", "\t$Rd, $sat_imm, $Rn", []>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1100;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
- let Inst{14-12} = 0b000; // imm3 = '000'
- let Inst{7-6} = 0b00; // imm2 = '00'
- let Inst{5-4} = 0b00;
+def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn),
+ "ssat16", "\t$Rd, $sat_imm, $Rn">,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{23-22} = 0b00;
+ let sh = 0b100000;
+ let Inst{4} = 0;
}
-def t2USAT: T2SatI<
- (outs rGPR:$Rd),
- (ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
- NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>,
- Requires<[IsThumb2]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1110;
- let Inst{20} = 0;
- let Inst{15} = 0;
+def t2USAT: T2SatI<(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ "usat", "\t$Rd, $sat_imm, $Rn$sh">,
+ Requires<[IsThumb2]> {
+ let Inst{23-22} = 0b10;
}
-def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
- NoItinerary,
- "usat16", "\t$Rd, $sat_imm, $Rn", []>,
+def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn),
+ "usat16", "\t$Rd, $sat_imm, $Rn">,
Requires<[IsThumb2, HasDSP]> {
- let Inst{31-22} = 0b1111001110;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
- let Inst{14-12} = 0b000; // imm3 = '000'
- let Inst{7-6} = 0b00; // imm2 = '00'
- let Inst{5-4} = 0b00;
+ let Inst{23-22} = 0b10;
+ let sh = 0b100000;
+ let Inst{4} = 0;
}
def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
@@ -2305,11 +2322,18 @@ def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
// Shift and rotate Instructions.
//
-defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, shl>;
+defm t2LSL : T2I_sh_ir<0b00, "lsl", imm1_31, shl>;
defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, srl>;
defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, sra>;
defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, rotr>;
+// LSL #0 is actually MOV, and has slightly different permitted registers to
+// LSL with non-zero shift
+def : t2InstAlias<"lsl${s}${p} $Rd, $Rm, #0",
+ (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"lsl${s}${p}.w $Rd, $Rm, #0",
+ (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+
// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
(t2RORrr rGPR:$lhs, rGPR:$rhs)>;
@@ -2547,7 +2571,8 @@ def : T2Pat<(t2_so_imm_not:$src),
let isCommutable = 1 in
def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
"mul", "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2558,7 +2583,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
class T2FourRegMLA<bits<4> op7_4, string opc, list<dag> pattern>
: T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
- Requires<[IsThumb2, UseMulOps]> {
+ Requires<[IsThumb2, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2575,8 +2601,12 @@ def t2MLS: T2FourRegMLA<0b0001, "mls",
// Extra precision multiplies with low / high results
let hasSideEffects = 0 in {
let isCommutable = 1 in {
-def t2SMULL : T2MulLong<0b000, 0b0000, "smull", []>;
-def t2UMULL : T2MulLong<0b010, 0b0000, "umull", []>;
+def t2SMULL : T2MulLong<0b000, 0b0000, "smull",
+ [(set rGPR:$RdLo, rGPR:$RdHi,
+ (smullohi rGPR:$Rn, rGPR:$Rm))]>;
+def t2UMULL : T2MulLong<0b010, 0b0000, "umull",
+ [(set rGPR:$RdLo, rGPR:$RdHi,
+ (umullohi rGPR:$Rn, rGPR:$Rm))]>;
} // isCommutable
// Multiply + accumulate
@@ -2592,7 +2622,8 @@ class T2SMMUL<bits<4> op7_4, string opc, list<dag> pattern>
: T2ThreeReg<(outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
opc, "\t$Rd, $Rn, $Rm", pattern>,
- Requires<[IsThumb2, HasDSP]> {
+ Requires<[IsThumb2, HasDSP]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2607,7 +2638,8 @@ class T2FourRegSMMLA<bits<3> op22_20, bits<4> op7_4, string opc,
list<dag> pattern>
: T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = op22_20;
@@ -2624,7 +2656,8 @@ class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc,
list<dag> pattern>
: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, opc,
"\t$Rd, $Rn, $Rm", pattern>,
- Requires<[IsThumb2, HasDSP]> {
+ Requires<[IsThumb2, HasDSP]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = op22_20;
@@ -2645,8 +2678,10 @@ def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb",
def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt",
[(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16))))]>;
-def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", []>;
-def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", []>;
+def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb",
+ [(set rGPR:$Rd, (ARMsmulwb rGPR:$Rn, rGPR:$Rm))]>;
+def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt",
+ [(set rGPR:$Rd, (ARMsmulwt rGPR:$Rn, rGPR:$Rm))]>;
def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn),
(t2SMULBB rGPR:$Rm, rGPR:$Rn)>;
@@ -2659,7 +2694,8 @@ class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc,
list<dag> pattern>
: T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMUL16,
opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = op22_20;
@@ -2680,8 +2716,10 @@ def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb",
def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt",
[(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16)))))]>;
-def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", []>;
-def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", []>;
+def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwb rGPR:$Rn, rGPR:$Rm)))]>;
+def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwt rGPR:$Rn, rGPR:$Rm)))]>;
def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)),
(t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
@@ -2692,25 +2730,32 @@ def : Thumb2DSPMulPat<(add rGPR:$Ra,
(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
(t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
-class T2SMLAL<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern>
- : T2FourReg_mac<1, op22_20, op7_4,
- (outs rGPR:$Ra, rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm),
- IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]>;
-
// Halfword multiple accumulate long: SMLAL<x><y>
-def t2SMLALBB : T2SMLAL<0b100, 0b1000, "smlalbb", []>;
-def t2SMLALBT : T2SMLAL<0b100, 0b1001, "smlalbt", []>;
-def t2SMLALTB : T2SMLAL<0b100, 0b1010, "smlaltb", []>;
-def t2SMLALTT : T2SMLAL<0b100, 0b1011, "smlaltt", []>;
+def t2SMLALBB : T2MlaLong<0b100, 0b1000, "smlalbb">,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALBT : T2MlaLong<0b100, 0b1001, "smlalbt">,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALTB : T2MlaLong<0b100, 0b1010, "smlaltb">,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALTT : T2MlaLong<0b100, 0b1011, "smlaltt">,
+ Requires<[IsThumb2, HasDSP]>;
+
+def : Thumb2DSPPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (t2SMLALBB $Rn, $Rm, $RLo, $RHi)>;
+def : Thumb2DSPPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (t2SMLALBT $Rn, $Rm, $RLo, $RHi)>;
+def : Thumb2DSPPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (t2SMLALTB $Rn, $Rm, $RLo, $RHi)>;
+def : Thumb2DSPPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (t2SMLALTT $Rn, $Rm, $RLo, $RHi)>;
class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc>
: T2ThreeReg_mac<0, op22_20, op7_4,
(outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]> {
+ Requires<[IsThumb2, HasDSP]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{15-12} = 0b1111;
}
@@ -2737,7 +2782,8 @@ class T2DualHalfMulAddLong<bits<3> op22_20, bits<4> op7_4, string opc>
(outs rGPR:$Ra, rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]>;
+ Requires<[IsThumb2, HasDSP]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def t2SMLALD : T2DualHalfMulAddLong<0b100, 0b1100, "smlald">;
def t2SMLALDX : T2DualHalfMulAddLong<0b100, 0b1101, "smlaldx">;
@@ -2751,7 +2797,8 @@ def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">;
def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb, HasV8MBaseline]> {
+ Requires<[HasDivide, IsThumb, HasV8MBaseline]>,
+ Sched<[WriteDIV]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
let Inst{20} = 0b1;
@@ -2762,7 +2809,8 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb, HasV8MBaseline]> {
+ Requires<[HasDivide, IsThumb, HasV8MBaseline]>,
+ Sched<[WriteDIV]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
let Inst{20} = 0b1;
@@ -2819,7 +2867,7 @@ def t2PKHBT : T2ThreeReg<
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
(and (shl rGPR:$Rm, pkh_lsl_amt:$sh),
0xFFFF0000)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]>,
+ Requires<[HasDSP, IsThumb2]>,
Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2835,10 +2883,10 @@ def t2PKHBT : T2ThreeReg<
// Alternate cases for PKHBT where identities eliminate some nodes.
def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
(t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
(t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
// will match the pattern below.
@@ -2848,7 +2896,7 @@ def t2PKHTB : T2ThreeReg<
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
(and (sra rGPR:$Rm, pkh_asr_amt:$sh),
0xFFFF)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]>,
+ Requires<[HasDSP, IsThumb2]>,
Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2867,14 +2915,14 @@ def t2PKHTB : T2ThreeReg<
// pkhtb src1, src2, asr (17..31).
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16:$sh)),
(t2PKHTB rGPR:$src1, rGPR:$src2, imm16:$sh)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (sra rGPR:$src2, imm16_31:$sh)),
(t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
(and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
(t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$sh)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
//===----------------------------------------------------------------------===//
// CRC32 Instructions
@@ -4216,13 +4264,13 @@ def : T2Pat<(and rGPR:$Rm, 0x000000FF), (t2UXTB rGPR:$Rm, 0)>,
def : T2Pat<(and rGPR:$Rm, 0x0000FFFF), (t2UXTH rGPR:$Rm, 0)>,
Requires<[IsThumb2]>;
def : T2Pat<(and rGPR:$Rm, 0x00FF00FF), (t2UXTB16 rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0x00FF)),
(t2UXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0xFFFF)),
(t2UXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
}
def : T2Pat<(sext_inreg rGPR:$Src, i8), (t2SXTB rGPR:$Src, 0)>,
@@ -4231,10 +4279,10 @@ def : T2Pat<(sext_inreg rGPR:$Src, i16), (t2SXTH rGPR:$Src, 0)>,
Requires<[IsThumb2]>;
def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i8)),
(t2SXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
// Atomic load/store patterns
def : T2Pat<(atomic_load_8 t2addrmode_imm12:$addr),
@@ -4325,26 +4373,26 @@ def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm",
pred:$p, cc_out:$s)>;
// add w/ negative immediates is just a sub.
-def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm",
(t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
-def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"add${p} $Rd, $Rn, $imm",
(t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
-def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+def : t2InstSubst<"add${s}${p} $Rdn, $imm",
(t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
-def : t2InstAlias<"add${p} $Rdn, $imm",
+def : t2InstSubst<"add${p} $Rdn, $imm",
(t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
-def : t2InstAlias<"add${s}${p}.w $Rd, $Rn, $imm",
+def : t2InstSubst<"add${s}${p}.w $Rd, $Rn, $imm",
(t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
-def : t2InstAlias<"addw${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"addw${p} $Rd, $Rn, $imm",
(t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
-def : t2InstAlias<"add${s}${p}.w $Rdn, $imm",
+def : t2InstSubst<"add${s}${p}.w $Rdn, $imm",
(t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
-def : t2InstAlias<"addw${p} $Rdn, $imm",
+def : t2InstSubst<"addw${p} $Rdn, $imm",
(t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
@@ -4431,10 +4479,10 @@ def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm",
// input operands swapped when the shift amount is zero (i.e., unspecified).
def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
(t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
(t2PKHBT rGPR:$Rd, rGPR:$Rm, rGPR:$Rn, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
// PUSH/POP aliases for STM/LDM
def : t2InstAlias<"push${p}.w $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
@@ -4513,16 +4561,16 @@ def : t2InstAlias<"strh${p} $Rt, $addr",
// Extend instruction optional rotate operand.
def : InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
(t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
(t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
(t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"sxtb16${p} $Rd, $Rm",
(t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : t2InstAlias<"sxtb${p} $Rd, $Rm",
(t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
@@ -4535,16 +4583,16 @@ def : t2InstAlias<"sxth${p}.w $Rd, $Rm",
def : InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
(t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
(t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
(t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"uxtb16${p} $Rd, $Rm",
(t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : t2InstAlias<"uxtb${p} $Rd, $Rm",
(t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
@@ -4560,7 +4608,7 @@ def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot",
(t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
def : InstAlias<"uxtb16${p} $Rd, $Rm$rot",
(t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : t2InstAlias<"uxth${p} $Rd, $Rm$rot",
(t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
@@ -4568,41 +4616,41 @@ def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot",
(t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
def : InstAlias<"sxtb16${p} $Rd, $Rm$rot",
(t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
(t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
// "mov Rd, t2_so_imm_not" can be handled via "mvn" in assembly, just like
// for isel.
-def : t2InstAlias<"mov${p} $Rd, $imm",
+def : t2InstSubst<"mov${p} $Rd, $imm",
(t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
-def : t2InstAlias<"mvn${p} $Rd, $imm",
- (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+def : t2InstSubst<"mvn${s}${p} $Rd, $imm",
+ (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
// Same for AND <--> BIC
-def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"bic${s}${p} $Rd, $Rn, $imm",
(t2ANDri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
+def : t2InstSubst<"bic${s}${p} $Rdn, $imm",
(t2ANDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"and${s}${p} $Rd, $Rn, $imm",
(t2BICri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : t2InstAlias<"and${s}${p} $Rdn, $imm",
+def : t2InstSubst<"and${s}${p} $Rdn, $imm",
(t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
// Likewise, "add Rd, t2_so_imm_neg" -> sub
-def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm",
(t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
pred:$p, cc_out:$s)>;
-def : t2InstAlias<"add${s}${p} $Rd, $imm",
+def : t2InstSubst<"add${s}${p} $Rd, $imm",
(t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
pred:$p, cc_out:$s)>;
// Same for CMP <--> CMN via t2_so_imm_neg
-def : t2InstAlias<"cmp${p} $Rd, $imm",
+def : t2InstSubst<"cmp${p} $Rd, $imm",
(t2CMNri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
-def : t2InstAlias<"cmn${p} $Rd, $imm",
+def : t2InstSubst<"cmn${p} $Rd, $imm",
(t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
@@ -4616,6 +4664,8 @@ def : t2InstAlias<"neg${s}${p} $Rd, $Rm",
// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for
// these, unfortunately.
+// FIXME: LSL #0 in the shift should allow SP to be used as either the
+// source or destination (but not both).
def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift",
(ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift",
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e99048645685..0f225156d4ca 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -11,14 +11,17 @@
//
//===----------------------------------------------------------------------===//
-def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_CMPFP0 : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisVT<1, i32>]>;
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
+def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, f64>]>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
-def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
//===----------------------------------------------------------------------===//
// Operand Definitions.
@@ -336,13 +339,15 @@ let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VADDD : ADbI<0b11100, 0b11, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPALU64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VADDS : ASbIn<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+ [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPALU32]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -352,19 +357,22 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VADDH : AHbI<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPALU64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+ [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPALU32]>{
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -374,37 +382,43 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VSUBH : AHbI<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPDIV64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VDIVS : ASbI<0b11101, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+ [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPDIV32]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VDIVH : AHbI<0b11101, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPDIV32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VMULD : ADbI<0b11100, 0b10, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VMULS : ASbIn<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+ [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -414,17 +428,20 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VMULH : AHbI<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+ [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>,
+ Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
def VNMULS : ASbI<0b11100, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+ [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -433,7 +450,8 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
def VNMULH : AHbI<0b11100, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
multiclass vsel_inst<string op, bits<2> opc, int CC> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
@@ -501,12 +519,12 @@ let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
- [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 1))]>;
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
- [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 1))]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -517,17 +535,15 @@ def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
[]>;
-
-// FIXME: Verify encoding after integrated assembler is working.
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
- [/* For disassembly only; pattern left blank */]>;
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 0))]>;
def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 0))]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -566,7 +582,7 @@ let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
- [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+ [(arm_cmpfp0 (f64 DPR:$Dd), (i32 1))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -574,7 +590,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
- [(arm_cmpfp0 SPR:$Sd)]> {
+ [(arm_cmpfp0 SPR:$Sd, (i32 1))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -591,11 +607,10 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
let Inst{5} = 0;
}
-// FIXME: Verify encoding after integrated assembler is working.
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfp0 (f64 DPR:$Dd), (i32 0))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -603,7 +618,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfp0 SPR:$Sd, (i32 0))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -624,7 +639,8 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
- [(set DPR:$Dd, (fpextend SPR:$Sm))]> {
+ [(set DPR:$Dd, (fpextend SPR:$Sm))]>,
+ Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Dd;
bits<5> Sm;
@@ -641,7 +657,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
// Special case encoding: bits 11-8 is 0b1011.
def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (fpround DPR:$Dm))]> {
+ [(set SPR:$Sd, (fpround DPR:$Dm))]>,
+ Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Sd;
bits<5> Dm;
@@ -663,31 +680,35 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
// Between half, single and double-precision. For disassembly only.
-// FIXME: Verify encoding after integrated assembler is working.
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
- []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]>,
+ Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Sm;
@@ -946,12 +967,14 @@ defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
- [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPSQRT64]>;
def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
- [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
+ [(set SPR:$Sd, (fsqrt SPR:$Sm))]>,
+ Sched<[WriteFPSQRT32]>;
def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
@@ -987,7 +1010,8 @@ def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0,
def VMOVRS : AVConv2I<0b11100001, 0b1010,
(outs GPR:$Rt), (ins SPR:$Sn),
IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
- [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+ [(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<4> Rt;
bits<5> Sn;
@@ -1010,7 +1034,8 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
[(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
- Requires<[HasVFP2, UseVMOVSR]> {
+ Requires<[HasVFP2, UseVMOVSR]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Sn;
bits<4> Rt;
@@ -1032,7 +1057,8 @@ let hasSideEffects = 0 in {
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
(outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
- [/* FIXME: Can't write pattern for multiple result instr*/]> {
+ [(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Dm;
bits<4> Rt;
@@ -1059,7 +1085,8 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011,
def VMOVRRS : AVConv3I<0b11000101, 0b1010,
(outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
- [/* For disassembly only; pattern left blank */]> {
+ [/* For disassembly only; pattern left blank */]>,
+ Sched<[WriteFPMOV]> {
bits<5> src1;
bits<4> Rt;
bits<4> Rt2;
@@ -1085,7 +1112,8 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
def VMOVDRR : AVConv5I<0b11000100, 0b1011,
(outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
- [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+ [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Dm;
bits<4> Rt;
@@ -1128,7 +1156,8 @@ let hasSideEffects = 0 in
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
- [/* For disassembly only; pattern left blank */]> {
+ [/* For disassembly only; pattern left blank */]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> dst1;
bits<4> src1;
@@ -1154,7 +1183,8 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001,
(outs GPR:$Rt), (ins SPR:$Sn),
IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
[]>,
- Requires<[HasFullFP16]> {
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<4> Rt;
bits<5> Sn;
@@ -1173,7 +1203,8 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001,
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
[]>,
- Requires<[HasFullFP16]> {
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Sn;
bits<4> Rt;
@@ -1254,7 +1285,8 @@ class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
}
@@ -1269,7 +1301,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd),(ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
// Some single precision VFP instructions may be executed on both NEON and
@@ -1286,14 +1319,16 @@ def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
}
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
}
@@ -1308,7 +1343,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
// Some single precision VFP instructions may be executed on both NEON and
@@ -1325,7 +1361,8 @@ def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
}
@@ -1390,7 +1427,8 @@ class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
@@ -1405,7 +1443,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1423,14 +1462,16 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
@@ -1445,7 +1486,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1463,52 +1505,58 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
let Uses = [FPSCR] in {
-// FIXME: Verify encoding after integrated assembler is working.
def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
+ [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
+ [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
+ [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
+ [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
}
@@ -1528,8 +1576,7 @@ let Constraints = "$a = $dst" in {
class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
bit op5, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
- Sched<[WriteCvtFP]> {
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{0};
@@ -1540,8 +1587,7 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
bit op5, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
- Sched<[WriteCvtFP]> {
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{4};
@@ -1553,26 +1599,31 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1604,45 +1655,54 @@ def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
// Fixed-Point to FP:
def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1650,7 +1710,8 @@ def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1658,7 +1719,8 @@ def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1666,7 +1728,8 @@ def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1674,19 +1737,23 @@ def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
} // End of 'let Constraints = "$a = $dst" in'
@@ -1700,7 +1767,8 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1708,7 +1776,8 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1734,7 +1803,8 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1742,7 +1812,8 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1768,7 +1839,8 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1776,7 +1848,8 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1802,14 +1875,16 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1838,7 +1913,8 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1846,7 +1922,8 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
@@ -1856,7 +1933,8 @@ def VFMAH : AHbI<0b11101, 0b10, 0, 0,
IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm",
[]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFusedMAC]>;
+ Requires<[HasFullFP16,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 2bdbe4fca3de..8d224d6a70fa 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -54,9 +54,21 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
DstSize <= SrcSize)) &&
"Copy with different width?!");
- assert(RegBank->getID() == ARM::GPRRegBankID && "Unsupported reg bank");
+ assert((RegBank->getID() == ARM::GPRRegBankID ||
+ RegBank->getID() == ARM::FPRRegBankID) &&
+ "Unsupported reg bank");
+
const TargetRegisterClass *RC = &ARM::GPRRegClass;
+ if (RegBank->getID() == ARM::FPRRegBankID) {
+ if (DstSize == 32)
+ RC = &ARM::SPRRegClass;
+ else if (DstSize == 64)
+ RC = &ARM::DPRRegClass;
+ else
+ llvm_unreachable("Unsupported destination size");
+ }
+
// No need to constrain SrcReg. It will get constrained when
// we hit another of its uses or its defs.
// Copies do not have constraints.
@@ -68,6 +80,146 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
return true;
}
+static bool selectFAdd(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI) {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select fp add without vfp");
+
+ LLT Ty = MRI.getType(MIB->getOperand(0).getReg());
+ unsigned ValSize = Ty.getSizeInBits();
+
+ if (ValSize == 32) {
+ if (TII.getSubtarget().useNEONForSinglePrecisionFP())
+ return false;
+ MIB->setDesc(TII.get(ARM::VADDS));
+ } else {
+ assert(ValSize == 64 && "Unsupported size for floating point value");
+ if (TII.getSubtarget().isFPOnlySP())
+ return false;
+ MIB->setDesc(TII.get(ARM::VADDD));
+ }
+ MIB.add(predOps(ARMCC::AL));
+
+ return true;
+}
+
+static bool selectSequence(MachineInstrBuilder &MIB,
+ const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select sequence without VFP");
+
+ // We only support G_SEQUENCE as a way to stick together two scalar GPRs
+ // into one DPR.
+ unsigned VReg0 = MIB->getOperand(0).getReg();
+ (void)VReg0;
+ assert(MRI.getType(VReg0).getSizeInBits() == 64 &&
+ RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID &&
+ "Unsupported operand for G_SEQUENCE");
+ unsigned VReg1 = MIB->getOperand(1).getReg();
+ (void)VReg1;
+ assert(MRI.getType(VReg1).getSizeInBits() == 32 &&
+ RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported operand for G_SEQUENCE");
+ unsigned VReg2 = MIB->getOperand(3).getReg();
+ (void)VReg2;
+ assert(MRI.getType(VReg2).getSizeInBits() == 32 &&
+ RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported operand for G_SEQUENCE");
+
+ // Remove the operands corresponding to the offsets.
+ MIB->RemoveOperand(4);
+ MIB->RemoveOperand(2);
+
+ MIB->setDesc(TII.get(ARM::VMOVDRR));
+ MIB.add(predOps(ARMCC::AL));
+
+ return true;
+}
+
+static bool selectExtract(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select extract without VFP");
+
+ // We only support G_EXTRACT as a way to break up one DPR into two GPRs.
+ unsigned VReg0 = MIB->getOperand(0).getReg();
+ (void)VReg0;
+ assert(MRI.getType(VReg0).getSizeInBits() == 32 &&
+ RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported operand for G_EXTRACT");
+ unsigned VReg1 = MIB->getOperand(1).getReg();
+ (void)VReg1;
+ assert(MRI.getType(VReg1).getSizeInBits() == 64 &&
+ RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::FPRRegBankID &&
+ "Unsupported operand for G_EXTRACT");
+ assert(MIB->getOperand(2).getImm() % 32 == 0 &&
+ "Unsupported operand for G_EXTRACT");
+
+ // Remove the operands corresponding to the offsets.
+ MIB->getOperand(2).setImm(MIB->getOperand(2).getImm() / 32);
+
+ MIB->setDesc(TII.get(ARM::VGETLNi32));
+ MIB.add(predOps(ARMCC::AL));
+
+ return true;
+}
+
+/// Select the opcode for simple extensions (that translate to a single SXT/UXT
+/// instruction). Extension operations more complicated than that should not
+/// invoke this. Returns the original opcode if it doesn't know how to select a
+/// better one.
+static unsigned selectSimpleExtOpc(unsigned Opc, unsigned Size) {
+ using namespace TargetOpcode;
+
+ if (Size != 8 && Size != 16)
+ return Opc;
+
+ if (Opc == G_SEXT)
+ return Size == 8 ? ARM::SXTB : ARM::SXTH;
+
+ if (Opc == G_ZEXT)
+ return Size == 8 ? ARM::UXTB : ARM::UXTH;
+
+ return Opc;
+}
+
+/// Select the opcode for simple loads and stores. For types smaller than 32
+/// bits, the value will be zero extended. Returns the original opcode if it
+/// doesn't know how to select a better one.
+static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
+ unsigned Size) {
+ bool isStore = Opc == TargetOpcode::G_STORE;
+
+ if (RegBank == ARM::GPRRegBankID) {
+ switch (Size) {
+ case 1:
+ case 8:
+ return isStore ? ARM::STRBi12 : ARM::LDRBi12;
+ case 16:
+ return isStore ? ARM::STRH : ARM::LDRH;
+ case 32:
+ return isStore ? ARM::STRi12 : ARM::LDRi12;
+ default:
+ return Opc;
+ }
+ }
+
+ if (RegBank == ARM::FPRRegBankID) {
+ switch (Size) {
+ case 32:
+ return isStore ? ARM::VSTRS : ARM::VLDRS;
+ case 64:
+ return isStore ? ARM::VSTRD : ARM::VLDRD;
+ default:
+ return Opc;
+ }
+ }
+
+ return Opc;
+}
+
bool ARMInstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -84,23 +236,129 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
}
MachineInstrBuilder MIB{MF, I};
+ bool isSExt = false;
using namespace TargetOpcode;
switch (I.getOpcode()) {
+ case G_SEXT:
+ isSExt = true;
+ LLVM_FALLTHROUGH;
+ case G_ZEXT: {
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ // FIXME: Smaller destination sizes coming soon!
+ if (DstTy.getSizeInBits() != 32) {
+ DEBUG(dbgs() << "Unsupported destination size for extension");
+ return false;
+ }
+
+ LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
+ unsigned SrcSize = SrcTy.getSizeInBits();
+ switch (SrcSize) {
+ case 1: {
+ // ZExt boils down to & 0x1; for SExt we also subtract that from 0
+ I.setDesc(TII.get(ARM::ANDri));
+ MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp());
+
+ if (isSExt) {
+ unsigned SExtResult = I.getOperand(0).getReg();
+
+ // Use a new virtual register for the result of the AND
+ unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ I.getOperand(0).setReg(AndResult);
+
+ auto InsertBefore = std::next(I.getIterator());
+ auto SubI =
+ BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri))
+ .addDef(SExtResult)
+ .addUse(AndResult)
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI))
+ return false;
+ }
+ break;
+ }
+ case 8:
+ case 16: {
+ unsigned NewOpc = selectSimpleExtOpc(I.getOpcode(), SrcSize);
+ if (NewOpc == I.getOpcode())
+ return false;
+ I.setDesc(TII.get(NewOpc));
+ MIB.addImm(0).add(predOps(ARMCC::AL));
+ break;
+ }
+ default:
+ DEBUG(dbgs() << "Unsupported source size for extension");
+ return false;
+ }
+ break;
+ }
case G_ADD:
+ case G_GEP:
I.setDesc(TII.get(ARM::ADDrr));
- AddDefaultCC(AddDefaultPred(MIB));
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+ break;
+ case G_FADD:
+ if (!selectFAdd(MIB, TII, MRI))
+ return false;
break;
case G_FRAME_INDEX:
// Add 0 to the given frame index and hope it will eventually be folded into
// the user(s).
I.setDesc(TII.get(ARM::ADDri));
- AddDefaultCC(AddDefaultPred(MIB.addImm(0)));
+ MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp());
break;
- case G_LOAD:
- I.setDesc(TII.get(ARM::LDRi12));
- AddDefaultPred(MIB.addImm(0));
+ case G_CONSTANT: {
+ unsigned Reg = I.getOperand(0).getReg();
+ if (MRI.getType(Reg).getSizeInBits() != 32)
+ return false;
+
+ assert(RBI.getRegBank(Reg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Expected constant to live in a GPR");
+ I.setDesc(TII.get(ARM::MOVi));
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+ break;
+ }
+ case G_STORE:
+ case G_LOAD: {
+ const auto &MemOp = **I.memoperands_begin();
+ if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
+ DEBUG(dbgs() << "Atomic load/store not supported yet\n");
+ return false;
+ }
+
+ unsigned Reg = I.getOperand(0).getReg();
+ unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID();
+
+ LLT ValTy = MRI.getType(Reg);
+ const auto ValSize = ValTy.getSizeInBits();
+
+ assert((ValSize != 64 || TII.getSubtarget().hasVFP2()) &&
+ "Don't know how to load/store 64-bit value without VFP");
+
+ const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize);
+ if (NewOpc == G_LOAD || NewOpc == G_STORE)
+ return false;
+
+ I.setDesc(TII.get(NewOpc));
+
+ if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH)
+ // LDRH has a funny addressing mode (there's already a FIXME for it).
+ MIB.addReg(0);
+ MIB.addImm(0).add(predOps(ARMCC::AL));
+ break;
+ }
+ case G_SEQUENCE: {
+ if (!selectSequence(MIB, TII, MRI, TRI, RBI))
+ return false;
break;
+ }
+ case G_EXTRACT: {
+ if (!selectExtract(MIB, TII, MRI, TRI, RBI))
+ return false;
+ break;
+ }
default:
return false;
}
diff --git a/lib/Target/ARM/ARMInstructionSelector.h b/lib/Target/ARM/ARMInstructionSelector.h
index 5072cdd60ce4..530141d92c2c 100644
--- a/lib/Target/ARM/ARMInstructionSelector.h
+++ b/lib/Target/ARM/ARMInstructionSelector.h
@@ -1,4 +1,4 @@
-//===- ARMInstructionSelector ------------------------------------*- C++ -*-==//
+//===- ARMInstructionSelector -----------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,8 +6,10 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//
/// \file
/// This file declares the targeting of the InstructionSelector class for ARM.
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
@@ -16,9 +18,9 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
namespace llvm {
+
class ARMBaseInstrInfo;
class ARMBaseRegisterInfo;
-class ARMBaseTargetMachine;
class ARMRegisterBankInfo;
class ARMSubtarget;
@@ -27,7 +29,7 @@ public:
ARMInstructionSelector(const ARMSubtarget &STI,
const ARMRegisterBankInfo &RBI);
- virtual bool select(MachineInstr &I) const override;
+ bool select(MachineInstr &I) const override;
private:
const ARMBaseInstrInfo &TII;
@@ -35,5 +37,6 @@ private:
const ARMRegisterBankInfo &RBI;
};
-} // End llvm namespace.
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 255ea4bc7198..994bbd673dd8 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "ARMLegalizerInfo.h"
+#include "ARMSubtarget.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
@@ -23,22 +24,53 @@ using namespace llvm;
#error "You shouldn't build this"
#endif
-ARMLegalizerInfo::ARMLegalizerInfo() {
+ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
using namespace TargetOpcode;
const LLT p0 = LLT::pointer(0, 32);
+ const LLT s1 = LLT::scalar(1);
const LLT s8 = LLT::scalar(8);
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
setAction({G_FRAME_INDEX, p0}, Legal);
- setAction({G_LOAD, s32}, Legal);
- setAction({G_LOAD, 1, p0}, Legal);
+ for (unsigned Op : {G_LOAD, G_STORE}) {
+ for (auto Ty : {s1, s8, s16, s32, p0})
+ setAction({Op, Ty}, Legal);
+ setAction({Op, 1, p0}, Legal);
+ }
- for (auto Ty : {s8, s16, s32})
+ for (auto Ty : {s1, s8, s16, s32})
setAction({G_ADD, Ty}, Legal);
+ for (unsigned Op : {G_SEXT, G_ZEXT}) {
+ setAction({Op, s32}, Legal);
+ for (auto Ty : {s1, s8, s16})
+ setAction({Op, 1, Ty}, Legal);
+ }
+
+ setAction({G_GEP, p0}, Legal);
+ setAction({G_GEP, 1, s32}, Legal);
+
+ setAction({G_CONSTANT, s32}, Legal);
+
+ if (!ST.useSoftFloat() && ST.hasVFP2()) {
+ setAction({G_FADD, s32}, Legal);
+ setAction({G_FADD, s64}, Legal);
+
+ setAction({G_LOAD, s64}, Legal);
+ setAction({G_STORE, s64}, Legal);
+ } else {
+ for (auto Ty : {s32, s64})
+ setAction({G_FADD, Ty}, Libcall);
+ }
+
+ for (unsigned Op : {G_FREM, G_FPOW})
+ for (auto Ty : {s32, s64})
+ setAction({Op, Ty}, Libcall);
+
computeTables();
}
diff --git a/lib/Target/ARM/ARMLegalizerInfo.h b/lib/Target/ARM/ARMLegalizerInfo.h
index ca3eea81271b..0b8a608a6bde 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/lib/Target/ARM/ARMLegalizerInfo.h
@@ -18,12 +18,12 @@
namespace llvm {
-class LLVMContext;
+class ARMSubtarget;
/// This class provides the information for the target register banks.
class ARMLegalizerInfo : public LegalizerInfo {
public:
- ARMLegalizerInfo();
+ ARMLegalizerInfo(const ARMSubtarget &ST);
};
} // End llvm namespace.
#endif
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 48ab491b5be9..72fcf7cd6a4f 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -517,8 +517,12 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
if (InsertSub) {
// An instruction above couldn't be updated, so insert a sub.
- AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
- .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
+ .add(t1CondCodeOp(true))
+ .addReg(Base)
+ .addImm(WordOffset * 4)
+ .addImm(Pred)
+ .addReg(PredReg);
return;
}
@@ -534,9 +538,12 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
// information and *always* have to reset at the end of a block.
// See PR21029.
if (MBBI != MBB.end()) --MBBI;
- AddDefaultT1CC(
- BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
- .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
+ .add(t1CondCodeOp(true))
+ .addReg(Base)
+ .addImm(WordOffset * 4)
+ .addImm(Pred)
+ .addReg(PredReg);
}
}
@@ -602,13 +609,12 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
// Exception: If the base register is in the input reglist, Thumb1 LDM is
// non-writeback.
// It's also not possible to merge an STR of the base register in Thumb1.
- if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
+ if (isThumb1 && ContainsReg(Regs, Base)) {
assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
- if (Opcode == ARM::tLDRi) {
+ if (Opcode == ARM::tLDRi)
Writeback = false;
- } else if (Opcode == ARM::tSTRi) {
+ else if (Opcode == ARM::tSTRi)
return nullptr;
- }
}
ARM_AM::AMSubMode Mode = ARM_AM::ia;
@@ -700,8 +706,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
.addReg(Base, getKillRegState(KillOldBase));
} else
BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
- .addReg(Base, getKillRegState(KillOldBase))
- .addImm(Pred).addReg(PredReg);
+ .addReg(Base, getKillRegState(KillOldBase))
+ .add(predOps(Pred, PredReg));
// The following ADDS/SUBS becomes an update.
Base = NewBase;
@@ -710,17 +716,21 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
if (BaseOpc == ARM::tADDrSPi) {
assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
- .addImm(Pred).addReg(PredReg);
+ .addReg(Base, getKillRegState(KillOldBase))
+ .addImm(Offset / 4)
+ .add(predOps(Pred, PredReg));
} else
- AddDefaultT1CC(
- BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
- .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
- .addImm(Pred).addReg(PredReg);
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+ .add(t1CondCodeOp(true))
+ .addReg(Base, getKillRegState(KillOldBase))
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg));
} else {
BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
- .addImm(Pred).addReg(PredReg).addReg(0);
+ .addReg(Base, getKillRegState(KillOldBase))
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp());
}
Base = NewBase;
BaseKill = true; // New base is always killed straight away.
@@ -1259,7 +1269,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// Transfer the rest of operands.
for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
- MIB.addOperand(MI->getOperand(OpNum));
+ MIB.add(MI->getOperand(OpNum));
// Transfer memoperands.
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
@@ -1392,14 +1402,19 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
} else {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
+ .addReg(Base, RegState::Define)
+ .addReg(Base)
+ .addReg(0)
+ .addImm(Imm)
+ .add(predOps(Pred, PredReg));
}
} else {
// t2LDR_PRE, t2LDR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base, RegState::Define)
+ .addReg(Base)
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg));
}
} else {
MachineOperand &MO = MI->getOperand(0);
@@ -1410,13 +1425,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
- .addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base)
+ .addReg(0)
+ .addImm(Imm)
+ .add(predOps(Pred, PredReg));
} else {
// t2STR_PRE, t2STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
- .addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base)
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg));
}
}
MBB.erase(MBBI);
@@ -1462,12 +1482,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
DebugLoc DL = MI.getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
- MIB.addOperand(Reg0Op).addOperand(Reg1Op)
- .addReg(BaseOp.getReg(), RegState::Define);
+ MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
} else {
assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
- MIB.addReg(BaseOp.getReg(), RegState::Define)
- .addOperand(Reg0Op).addOperand(Reg1Op);
+ MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
}
MIB.addReg(BaseOp.getReg(), RegState::Kill)
.addImm(Offset).addImm(Pred).addReg(PredReg);
@@ -1477,7 +1495,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
// Transfer implicit operands.
for (const MachineOperand &MO : MI.implicit_operands())
- MIB.addOperand(MO);
+ MIB.add(MO);
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MBB.erase(MBBI);
@@ -1891,8 +1909,9 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
for (auto Use : Prev->uses())
if (Use.isKill()) {
- AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
- .addReg(Use.getReg(), RegState::Kill))
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
+ .addReg(Use.getReg(), RegState::Kill)
+ .add(predOps(ARMCC::AL))
.copyImplicitOps(*MBBI);
MBB.erase(MBBI);
MBB.erase(Prev);
@@ -1942,6 +1961,7 @@ namespace {
static char ID;
ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+ AliasAnalysis *AA;
const DataLayout *TD;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -1955,6 +1975,11 @@ namespace {
return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
}
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
private:
bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
unsigned &NewOpc, unsigned &EvenReg,
@@ -1984,6 +2009,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
TRI = STI->getRegisterInfo();
MRI = &Fn.getRegInfo();
MF = &Fn;
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool Modified = false;
for (MachineBasicBlock &MFI : Fn)
@@ -1997,28 +2023,19 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
MachineBasicBlock::iterator E,
SmallPtrSetImpl<MachineInstr*> &MemOps,
SmallSet<unsigned, 4> &MemRegs,
- const TargetRegisterInfo *TRI) {
+ const TargetRegisterInfo *TRI,
+ AliasAnalysis *AA) {
// Are there stores / loads / calls between them?
- // FIXME: This is overly conservative. We should make use of alias information
- // some day.
SmallSet<unsigned, 4> AddedRegPressure;
while (++I != E) {
if (I->isDebugValue() || MemOps.count(&*I))
continue;
if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
return false;
- if (isLd && I->mayStore())
- return false;
- if (!isLd) {
- if (I->mayLoad())
- return false;
- // It's not safe to move the first 'str' down.
- // str r1, [r0]
- // strh r5, [r0]
- // str r4, [r0, #+4]
- if (I->mayStore())
- return false;
- }
+ if (I->mayStore() || (!isLd && I->mayLoad()))
+ for (MachineInstr *MemOp : MemOps)
+ if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
+ return false;
for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
MachineOperand &MO = I->getOperand(j);
if (!MO.isReg())
@@ -2142,33 +2159,40 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
unsigned LastBytes = 0;
unsigned NumMove = 0;
for (int i = Ops.size() - 1; i >= 0; --i) {
+ // Make sure each operation has the same kind.
MachineInstr *Op = Ops[i];
- unsigned Loc = MI2LocMap[Op];
- if (Loc <= FirstLoc) {
- FirstLoc = Loc;
- FirstOp = Op;
- }
- if (Loc >= LastLoc) {
- LastLoc = Loc;
- LastOp = Op;
- }
-
unsigned LSMOpcode
= getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
if (LastOpcode && LSMOpcode != LastOpcode)
break;
+ // Check that we have a continuous set of offsets.
int Offset = getMemoryOpOffset(*Op);
unsigned Bytes = getLSMultipleTransferSize(Op);
if (LastBytes) {
if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
break;
}
+
+ // Don't try to reschedule too many instructions.
+ if (NumMove == 8) // FIXME: Tune this limit.
+ break;
+
+ // Found a mergable instruction; save information about it.
+ ++NumMove;
LastOffset = Offset;
LastBytes = Bytes;
LastOpcode = LSMOpcode;
- if (++NumMove == 8) // FIXME: Tune this limit.
- break;
+
+ unsigned Loc = MI2LocMap[Op];
+ if (Loc <= FirstLoc) {
+ FirstLoc = Loc;
+ FirstOp = Op;
+ }
+ if (Loc >= LastLoc) {
+ LastLoc = Loc;
+ LastOp = Op;
+ }
}
if (NumMove <= 1)
@@ -2176,7 +2200,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
else {
SmallPtrSet<MachineInstr*, 4> MemOps;
SmallSet<unsigned, 4> MemRegs;
- for (int i = NumMove-1; i >= 0; --i) {
+ for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
MemOps.insert(Ops[i]);
MemRegs.insert(Ops[i]->getOperand(0).getReg());
}
@@ -2186,7 +2210,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
if (DoMove)
DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
- MemOps, MemRegs, TRI);
+ MemOps, MemRegs, TRI, AA);
if (!DoMove) {
for (unsigned i = 0; i != NumMove; ++i)
Ops.pop_back();
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 07044b9697b6..0fd98268723a 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -14,23 +14,36 @@
#include "ARM.h"
#include "ARMAsmPrinter.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
-using namespace llvm;
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+using namespace llvm;
MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
const MCSymbol *Symbol) {
+ MCSymbolRefExpr::VariantKind SymbolVariant = MCSymbolRefExpr::VK_None;
+ if (MO.getTargetFlags() & ARMII::MO_SBREL)
+ SymbolVariant = MCSymbolRefExpr::VK_ARM_SBREL;
+
const MCExpr *Expr =
- MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
switch (MO.getTargetFlags() & ARMII::MO_OPTION_MASK) {
default:
llvm_unreachable("Unknown target flag on symbol operand");
@@ -38,12 +51,12 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
break;
case ARMII::MO_LO16:
Expr =
- MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
Expr = ARMMCExpr::createLower16(Expr, OutContext);
break;
case ARMII::MO_HI16:
Expr =
- MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
Expr = ARMMCExpr::createUpper16(Expr, OutContext);
break;
}
@@ -75,11 +88,10 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
MO.getMBB()->getSymbol(), OutContext));
break;
- case MachineOperand::MO_GlobalAddress: {
+ case MachineOperand::MO_GlobalAddress:
MCOp = GetSymbolRef(MO,
GetARMGVSymbol(MO.getGlobal(), MO.getTargetFlags()));
break;
- }
case MachineOperand::MO_ExternalSymbol:
MCOp = GetSymbolRef(MO,
GetExternalSymbolSymbol(MO.getSymbolName()));
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index 50d8f0941460..e25d36b57616 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
using namespace llvm;
@@ -15,10 +16,4 @@ void ARMFunctionInfo::anchor() {}
ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
: isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()),
- hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()),
- StByValParamsPadding(0), ArgRegsSaveSize(0), ReturnRegsCount(0),
- HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false),
- FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false), ArgumentStackSize(0),
- IsSplitCSR(false), PromotedGlobalsIncrease(0) {}
+ hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()) {}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 8c485e89bf54..816116772995 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -14,11 +14,11 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
-#include "ARMSubtarget.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <utility>
namespace llvm {
@@ -29,42 +29,42 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// isThumb - True if this function is compiled under Thumb mode.
/// Used to initialized Align, so must precede it.
- bool isThumb;
+ bool isThumb = false;
/// hasThumb2 - True if the target architecture supports Thumb2. Do not use
/// to determine if function is compiled under Thumb mode, for that use
/// 'isThumb'.
- bool hasThumb2;
+ bool hasThumb2 = false;
/// StByValParamsPadding - For parameter that is split between
/// GPRs and memory; while recovering GPRs part, when
/// StackAlignment > 4, and GPRs-part-size mod StackAlignment != 0,
/// we need to insert gap before parameter start address. It allows to
/// "attach" GPR-part to the part that was passed via stack.
- unsigned StByValParamsPadding;
+ unsigned StByValParamsPadding = 0;
/// VarArgsRegSaveSize - Size of the register save area for vararg functions.
///
- unsigned ArgRegsSaveSize;
+ unsigned ArgRegsSaveSize = 0;
/// ReturnRegsCount - Number of registers used up in the return.
- unsigned ReturnRegsCount;
+ unsigned ReturnRegsCount = 0;
/// HasStackFrame - True if this function has a stack frame. Set by
/// determineCalleeSaves().
- bool HasStackFrame;
+ bool HasStackFrame = false;
/// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
/// emitPrologue.
- bool RestoreSPFromFP;
+ bool RestoreSPFromFP = false;
/// LRSpilledForFarJump - True if the LR register has been for spilled to
/// enable far jump.
- bool LRSpilledForFarJump;
+ bool LRSpilledForFarJump = false;
/// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
/// spill stack offset.
- unsigned FramePtrSpillOffset;
+ unsigned FramePtrSpillOffset = 0;
/// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
/// register spills areas. For Mac OS X:
@@ -77,16 +77,16 @@ class ARMFunctionInfo : public MachineFunctionInfo {
///
/// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3.
/// Some may be spilled after the stack has been realigned.
- unsigned GPRCS1Offset;
- unsigned GPRCS2Offset;
- unsigned DPRCSOffset;
+ unsigned GPRCS1Offset = 0;
+ unsigned GPRCS2Offset = 0;
+ unsigned DPRCSOffset = 0;
/// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
/// areas.
- unsigned GPRCS1Size;
- unsigned GPRCS2Size;
- unsigned DPRCSAlignGapSize;
- unsigned DPRCSSize;
+ unsigned GPRCS1Size = 0;
+ unsigned GPRCS2Size = 0;
+ unsigned DPRCSAlignGapSize = 0;
+ unsigned DPRCSSize = 0;
/// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
/// the aligned portion of the stack frame. This is always a contiguous
@@ -95,15 +95,15 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// We do not keep track of the frame indices used for these registers - they
/// behave like any other frame index in the aligned stack frame. These
/// registers also aren't included in DPRCSSize above.
- unsigned NumAlignedDPRCS2Regs;
+ unsigned NumAlignedDPRCS2Regs = 0;
- unsigned PICLabelUId;
+ unsigned PICLabelUId = 0;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
+ int VarArgsFrameIndex = 0;
/// HasITBlocks - True if IT blocks have been inserted.
- bool HasITBlocks;
+ bool HasITBlocks = false;
/// CPEClones - Track constant pool entries clones created by Constant Island
/// pass.
@@ -111,7 +111,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// ArgumentStackSize - amount of bytes on stack consumed by the arguments
/// being passed on the stack
- unsigned ArgumentStackSize;
+ unsigned ArgumentStackSize = 0;
/// CoalescedWeights - mapping of basic blocks to the rolling counter of
/// coalesced weights.
@@ -119,26 +119,16 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// True if this function has a subset of CSRs that is handled explicitly via
/// copies.
- bool IsSplitCSR;
+ bool IsSplitCSR = false;
/// Globals that have had their storage promoted into the constant pool.
SmallPtrSet<const GlobalVariable*,2> PromotedGlobals;
/// The amount the literal pool has been increasedby due to promoted globals.
- int PromotedGlobalsIncrease;
+ int PromotedGlobalsIncrease = 0;
public:
- ARMFunctionInfo() :
- isThumb(false),
- hasThumb2(false),
- ArgRegsSaveSize(0), ReturnRegsCount(0), HasStackFrame(false),
- RestoreSPFromFP(false),
- LRSpilledForFarJump(false),
- FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
- NumAlignedDPRCS2Regs(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false),
- PromotedGlobalsIncrease(0) {}
+ ARMFunctionInfo() = default;
explicit ARMFunctionInfo(MachineFunction &MF);
@@ -250,6 +240,7 @@ public:
PromotedGlobalsIncrease = Sz;
}
};
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 324087d670b5..08f3da738868 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -13,11 +13,15 @@
#include "ARMRegisterBankInfo.h"
#include "ARMInstrInfo.h" // For the register classes
+#include "ARMSubtarget.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#define GET_TARGET_REGBANK_IMPL
+#include "ARMGenRegisterBank.inc"
+
using namespace llvm;
#ifndef LLVM_BUILD_GLOBAL_ISEL
@@ -29,44 +33,109 @@ using namespace llvm;
// into an ARMGenRegisterBankInfo.def (similar to AArch64).
namespace llvm {
namespace ARM {
-const uint32_t GPRCoverageData[] = {
- // Classes 0-31
- (1u << ARM::GPRRegClassID) | (1u << ARM::GPRwithAPSRRegClassID) |
- (1u << ARM::GPRnopcRegClassID) | (1u << ARM::rGPRRegClassID) |
- (1u << ARM::hGPRRegClassID) | (1u << ARM::tGPRRegClassID) |
- (1u << ARM::GPRnopc_and_hGPRRegClassID) |
- (1u << ARM::hGPR_and_rGPRRegClassID) | (1u << ARM::tcGPRRegClassID) |
- (1u << ARM::tGPR_and_tcGPRRegClassID) | (1u << ARM::GPRspRegClassID) |
- (1u << ARM::hGPR_and_tcGPRRegClassID),
- // Classes 32-63
- 0,
- // Classes 64-96
- 0,
- // FIXME: Some of the entries below this point can be safely removed once
- // this is tablegenerated. It's only needed because of the hardcoded
- // register class limit.
- // Classes 97-128
- 0,
- // Classes 129-160
- 0,
- // Classes 161-192
- 0,
- // Classes 193-224
- 0,
+enum PartialMappingIdx {
+ PMI_GPR,
+ PMI_SPR,
+ PMI_DPR,
+ PMI_Min = PMI_GPR,
+};
+
+RegisterBankInfo::PartialMapping PartMappings[]{
+ // GPR Partial Mapping
+ {0, 32, GPRRegBank},
+ // SPR Partial Mapping
+ {0, 32, FPRRegBank},
+ // DPR Partial Mapping
+ {0, 64, FPRRegBank},
};
-RegisterBank GPRRegBank(ARM::GPRRegBankID, "GPRB", 32, ARM::GPRCoverageData);
-RegisterBank *RegBanks[] = {&GPRRegBank};
+#ifndef NDEBUG
+static bool checkPartMapping(const RegisterBankInfo::PartialMapping &PM,
+ unsigned Start, unsigned Length,
+ unsigned RegBankID) {
+ return PM.StartIdx == Start && PM.Length == Length &&
+ PM.RegBank->getID() == RegBankID;
+}
+
+static void checkPartialMappings() {
+ assert(
+ checkPartMapping(PartMappings[PMI_GPR - PMI_Min], 0, 32, GPRRegBankID) &&
+ "Wrong mapping for GPR");
+ assert(
+ checkPartMapping(PartMappings[PMI_SPR - PMI_Min], 0, 32, FPRRegBankID) &&
+ "Wrong mapping for SPR");
+ assert(
+ checkPartMapping(PartMappings[PMI_DPR - PMI_Min], 0, 64, FPRRegBankID) &&
+ "Wrong mapping for DPR");
+}
+#endif
-RegisterBankInfo::PartialMapping GPRPartialMapping{0, 32, GPRRegBank};
+enum ValueMappingIdx {
+ InvalidIdx = 0,
+ GPR3OpsIdx = 1,
+ SPR3OpsIdx = 4,
+ DPR3OpsIdx = 7,
+};
RegisterBankInfo::ValueMapping ValueMappings[] = {
- {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}};
+ // invalid
+ {nullptr, 0},
+ // 3 ops in GPRs
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ // 3 ops in SPRs
+ {&PartMappings[PMI_SPR - PMI_Min], 1},
+ {&PartMappings[PMI_SPR - PMI_Min], 1},
+ {&PartMappings[PMI_SPR - PMI_Min], 1},
+ // 3 ops in DPRs
+ {&PartMappings[PMI_DPR - PMI_Min], 1},
+ {&PartMappings[PMI_DPR - PMI_Min], 1},
+ {&PartMappings[PMI_DPR - PMI_Min], 1}};
+
+#ifndef NDEBUG
+static bool checkValueMapping(const RegisterBankInfo::ValueMapping &VM,
+ RegisterBankInfo::PartialMapping *BreakDown) {
+ return VM.NumBreakDowns == 1 && VM.BreakDown == BreakDown;
+}
+
+static void checkValueMappings() {
+ assert(checkValueMapping(ValueMappings[GPR3OpsIdx],
+ &PartMappings[PMI_GPR - PMI_Min]) &&
+ "Wrong value mapping for 3 GPR ops instruction");
+ assert(checkValueMapping(ValueMappings[GPR3OpsIdx + 1],
+ &PartMappings[PMI_GPR - PMI_Min]) &&
+ "Wrong value mapping for 3 GPR ops instruction");
+ assert(checkValueMapping(ValueMappings[GPR3OpsIdx + 2],
+ &PartMappings[PMI_GPR - PMI_Min]) &&
+ "Wrong value mapping for 3 GPR ops instruction");
+
+ assert(checkValueMapping(ValueMappings[SPR3OpsIdx],
+ &PartMappings[PMI_SPR - PMI_Min]) &&
+ "Wrong value mapping for 3 SPR ops instruction");
+ assert(checkValueMapping(ValueMappings[SPR3OpsIdx + 1],
+ &PartMappings[PMI_SPR - PMI_Min]) &&
+ "Wrong value mapping for 3 SPR ops instruction");
+ assert(checkValueMapping(ValueMappings[SPR3OpsIdx + 2],
+ &PartMappings[PMI_SPR - PMI_Min]) &&
+ "Wrong value mapping for 3 SPR ops instruction");
+
+ assert(checkValueMapping(ValueMappings[DPR3OpsIdx],
+ &PartMappings[PMI_DPR - PMI_Min]) &&
+ "Wrong value mapping for 3 DPR ops instruction");
+ assert(checkValueMapping(ValueMappings[DPR3OpsIdx + 1],
+ &PartMappings[PMI_DPR - PMI_Min]) &&
+ "Wrong value mapping for 3 DPR ops instruction");
+ assert(checkValueMapping(ValueMappings[DPR3OpsIdx + 2],
+ &PartMappings[PMI_DPR - PMI_Min]) &&
+ "Wrong value mapping for 3 DPR ops instruction");
+}
+#endif
} // end namespace arm
} // end namespace llvm
ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
- : RegisterBankInfo(ARM::RegBanks, ARM::NumRegisterBanks) {
+ : ARMGenRegisterBankInfo() {
static bool AlreadyInit = false;
// We have only one set of register banks, whatever the subtarget
// is. Therefore, the initialization of the RegBanks table should be
@@ -97,6 +166,11 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) &&
"Subclass not added?");
assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit");
+
+#ifndef NDEBUG
+ ARM::checkPartialMappings();
+ ARM::checkValueMappings();
+#endif
}
const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass(
@@ -105,8 +179,16 @@ const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass(
switch (RC.getID()) {
case GPRRegClassID:
+ case GPRnopcRegClassID:
+ case GPRspRegClassID:
case tGPR_and_tcGPRRegClassID:
+ case tGPRRegClassID:
return getRegBank(ARM::GPRRegBankID);
+ case SPR_8RegClassID:
+ case SPRRegClassID:
+ case DPR_8RegClassID:
+ case DPRRegClassID:
+ return getRegBank(ARM::FPRRegBankID);
default:
llvm_unreachable("Unsupported register kind");
}
@@ -128,23 +210,83 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
using namespace TargetOpcode;
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
unsigned NumOperands = MI.getNumOperands();
- const ValueMapping *OperandsMapping = &ARM::ValueMappings[0];
+ const ValueMapping *OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
switch (Opc) {
case G_ADD:
- case G_LOAD:
+ case G_SEXT:
+ case G_ZEXT:
+ case G_GEP:
// FIXME: We're abusing the fact that everything lives in a GPR for now; in
// the real world we would use different mappings.
- OperandsMapping = &ARM::ValueMappings[0];
+ OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
+ break;
+ case G_LOAD:
+ case G_STORE:
+ OperandsMapping =
+ Ty.getSizeInBits() == 64
+ ? getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx]})
+ : &ARM::ValueMappings[ARM::GPR3OpsIdx];
+ break;
+ case G_FADD:
+ assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) &&
+ "Unsupported size for G_FADD");
+ OperandsMapping = Ty.getSizeInBits() == 64
+ ? &ARM::ValueMappings[ARM::DPR3OpsIdx]
+ : &ARM::ValueMappings[ARM::SPR3OpsIdx];
break;
+ case G_CONSTANT:
case G_FRAME_INDEX:
- OperandsMapping = getOperandsMapping({&ARM::ValueMappings[0], nullptr});
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
break;
+ case G_SEQUENCE: {
+ // We only support G_SEQUENCE for creating a double precision floating point
+ // value out of two GPRs.
+ LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
+ LLT Ty2 = MRI.getType(MI.getOperand(3).getReg());
+ if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 ||
+ Ty2.getSizeInBits() != 32)
+ return InstructionMapping{};
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr,
+ &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
+ break;
+ }
+ case G_EXTRACT: {
+ // We only support G_EXTRACT for splitting a double precision floating point
+ // value into two GPRs.
+ LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
+ if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 64 ||
+ MI.getOperand(2).getImm() % 32 != 0)
+ return InstructionMapping{};
+ OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::DPR3OpsIdx],
+ nullptr, nullptr});
+ break;
+ }
default:
return InstructionMapping{};
}
+#ifndef NDEBUG
+ for (unsigned i = 0; i < NumOperands; i++) {
+ for (const auto &Mapping : OperandsMapping[i]) {
+ assert(
+ (Mapping.RegBank->getID() != ARM::FPRRegBankID ||
+ MF.getSubtarget<ARMSubtarget>().hasVFP2()) &&
+ "Trying to use floating point register bank on target without vfp");
+ }
+ }
+#endif
+
return InstructionMapping{DefaultMappingID, /*Cost=*/1, OperandsMapping,
NumOperands};
}
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.h b/lib/Target/ARM/ARMRegisterBankInfo.h
index 773920ee57a7..5222c1e6389f 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.h
+++ b/lib/Target/ARM/ARMRegisterBankInfo.h
@@ -16,19 +16,20 @@
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#define GET_REGBANK_DECLARATIONS
+#include "ARMGenRegisterBank.inc"
+
namespace llvm {
class TargetRegisterInfo;
-namespace ARM {
-enum {
- GPRRegBankID = 0, // General purpose registers
- NumRegisterBanks,
+class ARMGenRegisterBankInfo : public RegisterBankInfo {
+#define GET_TARGET_REGBANK_CLASS
+#include "ARMGenRegisterBank.inc"
};
-} // end namespace ARM
/// This class provides the information for the target register banks.
-class ARMRegisterBankInfo final : public RegisterBankInfo {
+class ARMRegisterBankInfo final : public ARMGenRegisterBankInfo {
public:
ARMRegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/lib/Target/ARM/ARMRegisterBanks.td b/lib/Target/ARM/ARMRegisterBanks.td
new file mode 100644
index 000000000000..7cd2d60d36a4
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterBanks.td
@@ -0,0 +1,14 @@
+//=- ARMRegisterBank.td - Describe the AArch64 Banks ---------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def GPRRegBank : RegisterBank<"GPRB", [GPR, GPRwithAPSR]>;
+def FPRRegBank : RegisterBank<"FPRB", [SPR, DPR]>;
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index b7d2d34614df..87eb4c2b9074 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Instruction scheduling annotations for out-of-order CPUs.
+// Instruction scheduling annotations for in-order and out-of-order CPUs.
// These annotations are independent of the itinerary class defined below.
// Here we define the subtarget independent read/write per-operand resources.
// The subtarget schedule definitions will then map these to the subtarget's
@@ -54,6 +54,9 @@
// }
// def : ReadAdvance<ReadAdvanceALUsr, 3>;
+//===----------------------------------------------------------------------===//
+// Sched definitions for integer pipeline instructions
+//
// Basic ALU operation.
def WriteALU : SchedWrite;
def ReadALU : SchedRead;
@@ -69,24 +72,65 @@ def WriteCMP : SchedWrite;
def WriteCMPsi : SchedWrite;
def WriteCMPsr : SchedWrite;
-// Division.
-def WriteDiv : SchedWrite;
+// Multiplys.
+def WriteMUL16 : SchedWrite; // 16-bit multiply.
+def WriteMUL32 : SchedWrite; // 32-bit multiply.
+def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg.
+def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg.
+def ReadMUL : SchedRead;
+
+// Multiply-accumulates.
+def WriteMAC16 : SchedWrite; // 16-bit mac.
+def WriteMAC32 : SchedWrite; // 32-bit mac.
+def WriteMAC64Lo : SchedWrite; // 64-bit mac. Low reg.
+def WriteMAC64Hi : SchedWrite; // 64-bit mac. High reg.
+def ReadMAC : SchedRead;
+
+// Divisions.
+def WriteDIV : SchedWrite;
-// Loads.
+// Loads/Stores.
def WriteLd : SchedWrite;
def WritePreLd : SchedWrite;
+def WriteST : SchedWrite;
// Branches.
def WriteBr : SchedWrite;
def WriteBrL : SchedWrite;
def WriteBrTbl : SchedWrite;
-// Fixpoint conversions.
-def WriteCvtFP : SchedWrite;
-
// Noop.
def WriteNoop : SchedWrite;
+//===----------------------------------------------------------------------===//
+// Sched definitions for floating-point and neon instructions
+//
+// Floating point conversions
+def WriteFPCVT : SchedWrite;
+def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa
+
+// ALU operations (32/64-bit)
+def WriteFPALU32 : SchedWrite;
+def WriteFPALU64 : SchedWrite;
+
+// Multiplication
+def WriteFPMUL32 : SchedWrite;
+def WriteFPMUL64 : SchedWrite;
+def ReadFPMUL : SchedRead; // multiplier read
+def ReadFPMAC : SchedRead; // accumulator read
+
+// Multiply-accumulate
+def WriteFPMAC32 : SchedWrite;
+def WriteFPMAC64 : SchedWrite;
+
+// Division
+def WriteFPDIV32 : SchedWrite;
+def WriteFPDIV64 : SchedWrite;
+
+// Square-root
+def WriteFPSQRT32 : SchedWrite;
+def WriteFPSQRT64 : SchedWrite;
+
// Define TII for use in SchedVariant Predicates.
def : PredicateProlog<[{
const ARMBaseInstrInfo *TII =
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 519e595bd184..8fb8a2a3b6d2 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1944,6 +1944,16 @@ def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
let NumMicroOps = 0; }
+def : SchedAlias<WriteMUL16, A9WriteM16>;
+def : SchedAlias<WriteMUL32, A9WriteM>;
+def : SchedAlias<WriteMUL64Lo, A9WriteM>;
+def : SchedAlias<WriteMUL64Hi, A9WriteMHi>;
+def : SchedAlias<WriteMAC16, A9WriteM16>;
+def : SchedAlias<WriteMAC32, A9WriteM>;
+def : SchedAlias<WriteMAC64Lo, A9WriteM>;
+def : SchedAlias<WriteMAC64Hi, A9WriteMHi>;
+def : ReadAdvance<ReadMUL, 0>;
+def : ReadAdvance<ReadMAC, 0>;
// Floating-point
// Only one FP or AGU instruction may issue per cycle. We model this
@@ -1953,6 +1963,7 @@ def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
+
def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
@@ -1992,6 +2003,7 @@ def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
// Load Integer.
def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
+def : SchedAlias<WriteLd, A9WriteL>;
// Load the upper 32-bits using the same micro-op.
def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
let NumMicroOps = 0; }
@@ -2471,6 +2483,34 @@ def : SchedAlias<WriteALUsr, A9WriteALUsr>;
def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
def : SchedAlias<ReadALU, A9ReadALU>;
def : SchedAlias<ReadALUsr, A9ReadALU>;
+def : SchedAlias<WriteST, A9WriteS>;
+
+// ===---------------------------------------------------------------------===//
+// Floating-point. Map target defined SchedReadWrite to processor specific ones
+//
+def : WriteRes<WriteFPCVT, [A9UnitFP, A9UnitAGU]> { let Latency = 4; }
+def : SchedAlias<WriteFPMOV, A9WriteFMov>;
+
+def : SchedAlias<WriteFPALU32, A9WriteF>;
+def : SchedAlias<WriteFPALU64, A9WriteF>;
+
+def : SchedAlias<WriteFPMUL32, A9WriteFMulS>;
+def : SchedAlias<WriteFPMUL64, A9WriteFMulD>;
+
+def : SchedAlias<WriteFPMAC32, A9WriteFMAS>;
+def : SchedAlias<WriteFPMAC64, A9WriteFMAD>;
+
+def : SchedAlias<WriteFPDIV32, A9WriteFDivS>;
+def : SchedAlias<WriteFPDIV64, A9WriteFDivD>;
+def : SchedAlias<WriteFPSQRT32, A9WriteFSqrtS>;
+def : SchedAlias<WriteFPSQRT64, A9WriteFSqrtD>;
+
+def : ReadAdvance<ReadFPMUL, 0>;
+def : ReadAdvance<ReadFPMAC, 0>;
+
+// ===---------------------------------------------------------------------===//
+// Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types.
+//
def : InstRW< [WriteALU],
(instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
"BICrr")>;
@@ -2518,12 +2558,11 @@ def : InstRW<[A9WriteLb],
"LDRH", "LDRSH", "LDRSB")>;
def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
-def : WriteRes<WriteDiv, []> { let Latency = 0; }
+def : WriteRes<WriteDIV, []> { let Latency = 0; }
def : WriteRes<WriteBr, [A9UnitB]>;
def : WriteRes<WriteBrL, [A9UnitB]>;
def : WriteRes<WriteBrTbl, [A9UnitB]>;
def : WriteRes<WritePreLd, []>;
-def : SchedAlias<WriteCvtFP, A9WriteF>;
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
} // SchedModel = CortexA9Model
diff --git a/lib/Target/ARM/ARMScheduleR52.td b/lib/Target/ARM/ARMScheduleR52.td
index 1b40742a093b..537e5da9669f 100644
--- a/lib/Target/ARM/ARMScheduleR52.td
+++ b/lib/Target/ARM/ARMScheduleR52.td
@@ -70,15 +70,13 @@ def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
+// Multiply - aliased to sub-target specific later
+
// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
-def : WriteRes<WriteDiv, [R52UnitDiv]> {
- let Latency = 8; let ResourceCycles = [8]; // not pipelined
+def : WriteRes<WriteDIV, [R52UnitDiv]> {
+ let Latency = 8; let ResourceCycles = [8]; // non-pipelined
}
-// Loads
-def : WriteRes<WriteLd, [R52UnitLd]> { let Latency = 4; }
-def : WriteRes<WritePreLd, [R52UnitLd]> { let Latency = 4; }
-
// Branches - LR written in Late EX2
def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
@@ -86,11 +84,44 @@ def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
// Misc
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
-def : WriteRes<WriteCvtFP, [R52UnitALU]> { let Latency = 3; }
+// Integer pipeline by-passes
def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
+def : ReadAdvance<ReadMUL, 0>;
+def : ReadAdvance<ReadMAC, 0>;
+
+// Floating-point. Map target-defined SchedReadWrites to subtarget
+def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
+
+def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
+ let Latency = 6;
+}
+
+def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
+ let Latency = 11; // as it is internally two insns (MUL then ADD)
+}
+def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
+ R52UnitFPALU, R52UnitFPALU]> {
+ let Latency = 11;
+}
+
+def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
+ let Latency = 7; // FP div takes fixed #cycles
+ let ResourceCycles = [7]; // is not pipelined
+}
+
+def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
+ let Latency = 17;
+ let ResourceCycles = [17];
+}
+
+def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
+def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
+
+def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
+def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
//===----------------------------------------------------------------------===//
// Subtarget-specific SchedReadWrites.
@@ -106,6 +137,9 @@ def : ReadAdvance<R52Read_F2, 2>;
// Cortex-R52 specific SchedWrites for use with InstRW
def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
+def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> {
+ let Latency = 4; let NumMicroOps = 0;
+}
def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
let Latency = 8; let ResourceCycles = [8]; // not pipelined
}
@@ -120,6 +154,19 @@ def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
+// Alias generics to sub-target specific
+def : SchedAlias<WriteMUL16, R52WriteMAC>;
+def : SchedAlias<WriteMUL32, R52WriteMAC>;
+def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
+def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
+def : SchedAlias<WriteMAC16, R52WriteMAC>;
+def : SchedAlias<WriteMAC32, R52WriteMAC>;
+def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
+def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
+def : SchedAlias<WritePreLd, R52WriteLd>;
+def : SchedAlias<WriteLd, R52WriteLd>;
+def : SchedAlias<WriteST, R52WriteST>;
+
def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
let Latency = 4;
@@ -147,19 +194,17 @@ def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
-def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> {
- let Latency = 7; // FP div takes fixed #cycles
- let ResourceCycles = [7]; // is not pipelined
- }
-def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> {
- let Latency = 17;
- let ResourceCycles = [17];
-}
-
-
//===----------------------------------------------------------------------===//
-// Subtarget-specific - map operands to SchedReadWrites
+// Floating-point. Map target defined SchedReadWrites to processor specific ones
+//
+def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
+def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
+def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
+def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
+//===----------------------------------------------------------------------===//
+// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
+//
def : InstRW<[WriteALU], (instrs COPY)>;
def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
@@ -235,7 +280,7 @@ def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
"t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
- (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
+ (instregex "t2SDIV", "t2UDIV")>;
// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
// However, that's non-trivial to specify, so we keep it uniform
@@ -294,15 +339,6 @@ def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
-//def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>;
-//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>;
-//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>;
-
-//def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>;
-//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>;
-//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>;
-
-
// Integer Load, Multiple.
foreach Lat = 3-25 in {
def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
@@ -492,12 +528,6 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VAC
def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
-def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>;
-def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>;
-
-def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1],
- (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>;
-
def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
@@ -687,16 +717,19 @@ def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 6;
let NumMicroOps = 3;
let ResourceCycles = [2];
+ let SingleIssue = 1;
}
def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 7;
let NumMicroOps = 5;
let ResourceCycles = [3];
+ let SingleIssue = 1;
}
def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 8;
let NumMicroOps = 7;
let ResourceCycles = [4];
+ let SingleIssue = 1;
}
def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 5;
@@ -777,9 +810,8 @@ def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHS
def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
-def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
-def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>;
def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index ea2bf4b578f0..dc041c6c6006 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -133,6 +133,8 @@ let SchedModel = SwiftModel in {
def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
def : ReadAdvance<ReadALU, 0>;
def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
+ def : SchedAlias<WriteLd, SwiftWriteP2ThreeCycle>;
+ def : SchedAlias<WriteST, SwiftWriteP2>;
def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
@@ -166,10 +168,10 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftWriteP01OneCycle2x_load],
(instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
- def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
+ def SwiftWriteP0TwoCycleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
- SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
+ SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCycleTwoUops ]>,
SchedVar<NoSchedPred, [ SwiftWriteP0OneCycle ]>
]>;
@@ -282,6 +284,18 @@ let SchedModel = SwiftModel in {
let ResourceCycles = [2, 3];
}
+ // Aliasing sub-target specific WriteRes to generic ones
+ def : SchedAlias<WriteMUL16, SwiftWriteP0FourCycle>;
+ def : SchedAlias<WriteMUL32, SwiftWriteP0FourCycle>;
+ def : SchedAlias<WriteMUL64Lo, SwiftP0P0P01FiveCycle>;
+ def : SchedAlias<WriteMUL64Hi, SwiftWrite5Cycle>;
+ def : SchedAlias<WriteMAC16, SwiftPredP0P01FourFiveCycle>;
+ def : SchedAlias<WriteMAC32, SwiftPredP0P01FourFiveCycle>;
+ def : SchedAlias<WriteMAC64Lo, SwiftWrite5Cycle>;
+ def : SchedAlias<WriteMAC64Hi, Swift2P03P01FiveCycle>;
+ def : ReadAdvance<ReadMUL, 0>;
+ def : SchedAlias<ReadMAC, SwiftReadAdvanceFourCyclesPred>;
+
// 4.2.15 Integer Multiply Accumulate, Long
// 4.2.16 Integer Multiply Accumulate, Dual
// 4.2.17 Integer Multiply Accumulate Accumulate, Long
@@ -300,7 +314,7 @@ let SchedModel = SwiftModel in {
let ResourceCycles = [1, 14];
}
// 4.2.18 Integer Divide
- def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
+ def : WriteRes<WriteDIV, [SwiftUnitDiv]>; // Workaround.
def : InstRW <[SwiftDiv],
(instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
@@ -310,7 +324,7 @@ let SchedModel = SwiftModel in {
let Latency = 3;
let NumMicroOps = 2;
}
- def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
+ def SwiftWriteP2P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
let Latency = 4;
let NumMicroOps = 2;
}
@@ -343,7 +357,7 @@ let SchedModel = SwiftModel in {
"tLDR(r|i|spi|pci|pciASM)")>;
def : InstRW<[SwiftWriteP2ThreeCycle],
(instregex "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>;
- def : InstRW<[SwiftWriteP2P01FourCyle],
+ def : InstRW<[SwiftWriteP2P01FourCycle],
(instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
"t2LDRpci_pic", "tLDRS(B|H)")>;
def : InstRW<[SwiftWriteP2P01ThreeCycle, SwiftWrBackOne],
@@ -597,8 +611,6 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftWriteP1FourCycle],
(instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
"VMULL", "VQDMULL")>;
- def : InstRW<[SwiftWriteP1SixCycle],
- (instregex "VMULD", "VNMULD")>;
def : InstRW<[SwiftWriteP1FourCycle],
(instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
"VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
@@ -607,8 +619,6 @@ let SchedModel = SwiftModel in {
// 4.2.36 Advanced SIMD and VFP, Convert
def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
- // Fixpoint conversions.
- def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
// 4.2.37 Advanced SIMD and VFP, Move
def : InstRW<[SwiftWriteP0TwoCycle],
@@ -1036,6 +1046,30 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
+ // ===---------------------------------------------------------------------===//
+ // Floating-point. Map target defined SchedReadWrite to processor specific ones
+ //
+ def : SchedAlias<WriteFPCVT, SwiftWriteP1FourCycle>;
+ def : SchedAlias<WriteFPMOV, SwiftWriteP2ThreeCycle>;
+
+ def : SchedAlias<WriteFPALU32, SwiftWriteP0FourCycle>;
+ def : SchedAlias<WriteFPALU64, SwiftWriteP0SixCycle>;
+
+ def : SchedAlias<WriteFPMUL32, SwiftWriteP1FourCycle>;
+ def : SchedAlias<WriteFPMUL64, SwiftWriteP1SixCycle>;
+
+ def : SchedAlias<WriteFPMAC32, SwiftWriteP1FourCycle>;
+ def : SchedAlias<WriteFPMAC64, SwiftWriteP1FourCycle>;
+
+ def : SchedAlias<WriteFPDIV32, SwiftDiv17>;
+ def : SchedAlias<WriteFPSQRT32, SwiftDiv17>;
+
+ def : SchedAlias<WriteFPDIV64, SwiftDiv32>;
+ def : SchedAlias<WriteFPSQRT64, SwiftDiv32>;
+
+ def : ReadAdvance<ReadFPMUL, 0>;
+ def : ReadAdvance<ReadFPMAC, 0>;
+
// Not specified.
def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
// Preload.
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 3b99762f7157..33dcf9b8fef0 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -95,7 +95,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
Entry.Node = Src;
Entry.Ty = Type::getInt32Ty(*DAG.getContext());
- Entry.isSExt = false;
+ Entry.IsSExt = false;
Args.push_back(Entry);
} else {
Entry.Node = Src;
@@ -114,11 +114,11 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(
- TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
- TLI->getPointerTy(DAG.getDataLayout())),
- std::move(Args))
+ .setLibCallee(
+ TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
+ TLI->getPointerTy(DAG.getDataLayout())),
+ std::move(Args))
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
@@ -198,17 +198,18 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
return Chain;
// Issue loads / stores for the trailing (1 - 3) bytes.
+ auto getRemainingValueType = [](unsigned BytesLeft) {
+ return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
+ };
+ auto getRemainingSize = [](unsigned BytesLeft) {
+ return (BytesLeft >= 2) ? 2 : 1;
+ };
+
unsigned BytesLeftSave = BytesLeft;
i = 0;
while (BytesLeft) {
- if (BytesLeft >= 2) {
- VT = MVT::i16;
- VTSize = 2;
- } else {
- VT = MVT::i8;
- VTSize = 1;
- }
-
+ VT = getRemainingValueType(BytesLeft);
+ VTSize = getRemainingSize(BytesLeft);
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, dl, MVT::i32)),
@@ -224,14 +225,8 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
i = 0;
BytesLeft = BytesLeftSave;
while (BytesLeft) {
- if (BytesLeft >= 2) {
- VT = MVT::i16;
- VTSize = 2;
- } else {
- VT = MVT::i8;
- VTSize = 1;
- }
-
+ VT = getRemainingValueType(BytesLeft);
+ VTSize = getRemainingSize(BytesLeft);
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
DAG.getConstant(DstOff, dl, MVT::i32)),
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index e2df0bddd0d1..b8a708a20a95 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -13,25 +13,27 @@
#include "ARMSubtarget.h"
#include "ARMFrameLowering.h"
-#include "ARMISelLowering.h"
#include "ARMInstrInfo.h"
-#include "ARMMachineFunctionInfo.h"
-#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "Thumb1FrameLowering.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Attributes.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/TargetParser.h"
+#include <cassert>
+#include <string>
using namespace llvm;
@@ -104,7 +106,7 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
: !isThumb()
? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
: (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
- TLInfo(TM, *this), GISel() {}
+ TLInfo(TM, *this) {}
const CallLowering *ARMSubtarget::getCallLowering() const {
assert(GISel && "Access to GlobalISel APIs not set");
@@ -148,11 +150,11 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isTargetDarwin()) {
StringRef ArchName = TargetTriple.getArchName();
- unsigned ArchKind = llvm::ARM::parseArch(ArchName);
- if (ArchKind == llvm::ARM::AK_ARMV7S)
+ unsigned ArchKind = ARM::parseArch(ArchName);
+ if (ArchKind == ARM::AK_ARMV7S)
// Default to the Swift CPU when targeting armv7s/thumbv7s.
CPUString = "swift";
- else if (ArchKind == llvm::ARM::AK_ARMV7K)
+ else if (ArchKind == ARM::AK_ARMV7K)
// Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
// ARMv7k does not use SjLj exception handling.
CPUString = "cortex-a7";
@@ -200,12 +202,12 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// support in the assembler and linker to be used. This would need to be
// fixed to fully support tail calls in Thumb1.
//
- // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
- // LR. This means if we need to reload LR, it takes an extra instructions,
- // which outweighs the value of the tail call; but here we don't know yet
- // whether LR is going to be used. Probably the right approach is to
- // generate the tail call here and turn it back into CALL/RET in
- // emitEpilogue if LR is used.
+ // For ARMv8-M, we /do/ implement tail calls. Doing this is tricky for v8-M
+ // baseline, since the LDM/POP instruction on Thumb doesn't take LR. This
+ // means if we need to reload LR, it takes extra instructions, which outweighs
+ // the value of the tail call; but here we don't know yet whether LR is going
+ // to be used. We generate the tail call here and turn it back into CALL/RET
+ // in emitEpilogue if LR is used.
// Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
// but we need to make sure there are enough registers; the only valid
@@ -274,6 +276,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexM3:
case ExynosM1:
case CortexR52:
+ case Kryo:
break;
case Krait:
PreISelOperandLatencyAdjustment = 1;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8c8218d0f432..40993fc0aa8a 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -14,47 +14,93 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
#define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
-
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMFrameLowering.h"
#include "ARMISelLowering.h"
-#include "ARMInstrInfo.h"
#include "ARMSelectionDAGInfo.h"
-#include "ARMSubtarget.h"
-#include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "Thumb1FrameLowering.h"
-#include "Thumb1InstrInfo.h"
-#include "Thumb2InstrInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <memory>
#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "ARMGenSubtargetInfo.inc"
namespace llvm {
+
+class ARMBaseTargetMachine;
class GlobalValue;
class StringRef;
-class TargetOptions;
-class ARMBaseTargetMachine;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
- CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexR52, CortexM3,
- CortexA32, CortexA35, CortexA53, CortexA57, CortexA72, CortexA73,
- Krait, Swift, ExynosM1
+ Others,
+
+ CortexA12,
+ CortexA15,
+ CortexA17,
+ CortexA32,
+ CortexA35,
+ CortexA5,
+ CortexA53,
+ CortexA57,
+ CortexA7,
+ CortexA72,
+ CortexA73,
+ CortexA8,
+ CortexA9,
+ CortexM3,
+ CortexR4,
+ CortexR4F,
+ CortexR5,
+ CortexR52,
+ CortexR7,
+ ExynosM1,
+ Krait,
+ Kryo,
+ Swift
};
enum ARMProcClassEnum {
- None, AClass, RClass, MClass
+ None,
+
+ AClass,
+ MClass,
+ RClass
};
enum ARMArchEnum {
- ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te,
- ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r,
- ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline,
+ ARMv2,
+ ARMv2a,
+ ARMv3,
+ ARMv3m,
+ ARMv4,
+ ARMv4t,
+ ARMv5,
+ ARMv5t,
+ ARMv5te,
+ ARMv5tej,
+ ARMv6,
+ ARMv6k,
+ ARMv6kz,
+ ARMv6m,
+ ARMv6sm,
+ ARMv6t2,
+ ARMv7a,
+ ARMv7em,
+ ARMv7m,
+ ARMv7r,
+ ARMv7ve,
+ ARMv81a,
+ ARMv82a,
+ ARMv8a,
+ ARMv8mBaseline,
+ ARMv8mMainline,
ARMv8r
};
@@ -168,10 +214,6 @@ protected:
/// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
bool HasHardwareDivideInARM = false;
- /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
- /// instructions.
- bool HasT2ExtractPack = false;
-
/// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
/// instructions.
bool HasDataBarrier = false;
@@ -310,6 +352,10 @@ protected:
/// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
bool UseSjLjEH = false;
+ /// Implicitly convert an instruction to a different one if its immediates
+ /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
+ bool NegativeImmediates = true;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment = 4;
@@ -362,6 +408,7 @@ public:
unsigned getMaxInlineSizeThreshold() const {
return 64;
}
+
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -373,15 +420,19 @@ public:
const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
+
const ARMBaseInstrInfo *getInstrInfo() const override {
return InstrInfo.get();
}
+
const ARMTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
+
const ARMFrameLowering *getFrameLowering() const override {
return FrameLowering.get();
}
+
const ARMBaseRegisterInfo *getRegisterInfo() const override {
return &InstrInfo->getRegisterInfo();
}
@@ -451,19 +502,21 @@ public:
bool hasCRC() const { return HasCRC; }
bool hasRAS() const { return HasRAS; }
bool hasVirtualization() const { return HasVirtualization; }
+
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP;
}
bool hasDivide() const { return HasHardwareDivide; }
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
- bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
bool hasV7Clrex() const { return HasV7Clrex; }
bool hasAcquireRelease() const { return HasAcquireRelease; }
+
bool hasAnyDataBarrier() const {
return HasDataBarrier || (hasV6Ops() && !isThumb());
}
+
bool useMulOps() const { return UseMulOps; }
bool useFPVMLx() const { return !SlowFPVMLx; }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
@@ -561,9 +614,10 @@ public:
TargetTriple.getEnvironment() == Triple::EABIHF ||
isTargetWindows() || isAAPCS16_ABI();
}
+
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
- virtual bool isXRaySupported() const override;
+ bool isXRaySupported() const override;
bool isAPCS_ABI() const;
bool isAAPCS_ABI() const;
@@ -588,6 +642,7 @@ public:
bool useR7AsFramePointer() const {
return isTargetDarwin() || (!isTargetWindows() && isThumb());
}
+
/// Returns true if the frame setup is split into two separate pushes (first
/// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
/// to lr. This is always required on Thumb1-only targets, as the push and
@@ -656,6 +711,7 @@ public:
/// True if fast-isel is used.
bool useFastISel() const;
};
-} // End llvm namespace
-#endif // ARMSUBTARGET_H
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 70c9567d99f8..b8dadb331ecf 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -10,30 +10,50 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMTargetMachine.h"
#include "ARM.h"
#include "ARMCallLowering.h"
-#include "ARMFrameLowering.h"
#include "ARMInstructionSelector.h"
#include "ARMLegalizerInfo.h"
#include "ARMRegisterBankInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
#include "ARMTargetTransformInfo.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/ExecutionDepsFix.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
+#include <cassert>
+#include <memory>
+#include <string>
+
using namespace llvm;
static cl::opt<bool>
@@ -57,6 +77,10 @@ static cl::opt<cl::boolOrDefault>
EnableGlobalMerge("arm-global-merge", cl::Hidden,
cl::desc("Enable the global merge pass"));
+namespace llvm {
+ void initializeARMExecutionDepsFixPass(PassRegistry&);
+}
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget());
@@ -68,14 +92,16 @@ extern "C" void LLVMInitializeARMTarget() {
initializeGlobalISel(Registry);
initializeARMLoadStoreOptPass(Registry);
initializeARMPreAllocLoadStoreOptPass(Registry);
+ initializeARMConstantIslandsPass(Registry);
+ initializeARMExecutionDepsFixPass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO())
- return make_unique<TargetLoweringObjectFileMachO>();
+ return llvm::make_unique<TargetLoweringObjectFileMachO>();
if (TT.isOSWindows())
- return make_unique<TargetLoweringObjectFileCOFF>();
- return make_unique<ARMElfTargetObjectFile>();
+ return llvm::make_unique<TargetLoweringObjectFileCOFF>();
+ return llvm::make_unique<ARMElfTargetObjectFile>();
}
static ARMBaseTargetMachine::ARMABI
@@ -94,13 +120,13 @@ computeTargetABI(const Triple &TT, StringRef CPU,
ARMBaseTargetMachine::ARMABI TargetABI =
ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
- unsigned ArchKind = llvm::ARM::parseCPUArch(CPU);
- StringRef ArchName = llvm::ARM::getArchName(ArchKind);
+ unsigned ArchKind = ARM::parseCPUArch(CPU);
+ StringRef ArchName = ARM::getArchName(ArchKind);
// FIXME: This is duplicated code from the front end and should be unified.
if (TT.isOSBinFormatMachO()) {
- if (TT.getEnvironment() == llvm::Triple::EABI ||
- (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
- llvm::ARM::parseArchProfile(ArchName) == llvm::ARM::PK_M) {
+ if (TT.getEnvironment() == Triple::EABI ||
+ (TT.getOS() == Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
+ ARM::parseArchProfile(ArchName) == ARM::PK_M) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
} else if (TT.isWatchABI()) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
@@ -113,16 +139,16 @@ computeTargetABI(const Triple &TT, StringRef CPU,
} else {
// Select the default based on the platform.
switch (TT.getEnvironment()) {
- case llvm::Triple::Android:
- case llvm::Triple::GNUEABI:
- case llvm::Triple::GNUEABIHF:
- case llvm::Triple::MuslEABI:
- case llvm::Triple::MuslEABIHF:
- case llvm::Triple::EABIHF:
- case llvm::Triple::EABI:
+ case Triple::Android:
+ case Triple::GNUEABI:
+ case Triple::GNUEABIHF:
+ case Triple::MuslEABI:
+ case Triple::MuslEABIHF:
+ case Triple::EABIHF:
+ case Triple::EABI:
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
break;
- case llvm::Triple::GNU:
+ case Triple::GNU:
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
break;
default:
@@ -141,7 +167,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
const TargetOptions &Options,
bool isLittle) {
auto ABI = computeTargetABI(TT, CPU, Options);
- std::string Ret = "";
+ std::string Ret;
if (isLittle)
// Little endian.
@@ -238,29 +264,35 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
}
}
-ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
+ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
#ifdef LLVM_BUILD_GLOBAL_ISEL
namespace {
+
struct ARMGISelActualAccessor : public GISelAccessor {
std::unique_ptr<CallLowering> CallLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
const CallLowering *getCallLowering() const override {
return CallLoweringInfo.get();
}
+
const InstructionSelector *getInstructionSelector() const override {
return InstSelector.get();
}
+
const LegalizerInfo *getLegalizerInfo() const override {
return Legalizer.get();
}
+
const RegisterBankInfo *getRegBankInfo() const override {
return RegBankInfo.get();
}
};
-} // End anonymous namespace.
+
+} // end anonymous namespace
#endif
const ARMSubtarget *
@@ -300,7 +332,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
#else
ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor();
GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering()));
- GISel->Legalizer.reset(new ARMLegalizerInfo());
+ GISel->Legalizer.reset(new ARMLegalizerInfo(*I));
auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo());
@@ -390,6 +422,7 @@ ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT,
: ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
namespace {
+
/// ARM Code Generator Pass Configuration Options.
class ARMPassConfig : public TargetPassConfig {
public:
@@ -413,7 +446,21 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
-} // namespace
+
+class ARMExecutionDepsFix : public ExecutionDepsFix {
+public:
+ static char ID;
+ ARMExecutionDepsFix() : ExecutionDepsFix(ID, ARM::DPRRegClass) {}
+ StringRef getPassName() const override {
+ return "ARM Execution Dependency Fix";
+ }
+};
+char ARMExecutionDepsFix::ID;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix",
+ "ARM Execution Dependency Fix", false, false)
TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
return new ARMPassConfig(this, PM);
@@ -508,7 +555,7 @@ void ARMPassConfig::addPreSched2() {
if (EnableARMLoadStoreOpt)
addPass(createARMLoadStoreOptimizationPass());
- addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+ addPass(new ARMExecutionDepsFix());
}
// Expand some pseudo instructions into multiple instructions to allow
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index c6b70b953162..f0ca9427d9fb 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -14,10 +14,14 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
#define LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
-#include "ARMInstrInfo.h"
#include "ARMSubtarget.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
+#include <memory>
namespace llvm {
@@ -32,7 +36,7 @@ public:
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- ARMSubtarget Subtarget;
+ ARMSubtarget Subtarget;
bool isLittle;
mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap;
@@ -62,7 +66,8 @@ public:
///
class ARMTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
- public:
+
+public:
ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Optional<Reloc::Model> RM, CodeModel::Model CM,
@@ -73,6 +78,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
///
class ARMLETargetMachine : public ARMTargetMachine {
void anchor() override;
+
public:
ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -84,6 +90,7 @@ public:
///
class ARMBETargetMachine : public ARMTargetMachine {
void anchor() override;
+
public:
ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -97,6 +104,7 @@ public:
///
class ThumbTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
+
public:
ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -108,6 +116,7 @@ public:
///
class ThumbLETargetMachine : public ThumbTargetMachine {
void anchor() override;
+
public:
ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -119,6 +128,7 @@ public:
///
class ThumbBETargetMachine : public ThumbTargetMachine {
void anchor() override;
+
public:
ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -128,4 +138,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 625c4280e1a6..94f9e8dfebbf 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -7,17 +7,20 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMTargetObjectFile.h"
+#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/IR/Mangler.h"
+#include "ARMTargetObjectFile.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
using namespace llvm;
using namespace dwarf;
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index 24e755ddac27..dbb8128269dc 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -11,19 +11,19 @@
#define LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCExpr.h"
namespace llvm {
-class MCContext;
-class TargetMachine;
-
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
mutable bool genExecuteOnly = false;
+
protected:
- const MCSection *AttributesSection;
+ const MCSection *AttributesSection = nullptr;
+
public:
ARMElfTargetObjectFile()
- : TargetLoweringObjectFileELF(), AttributesSection(nullptr) {
+ : TargetLoweringObjectFileELF() {
PLTRelativeVariantKind = MCSymbolRefExpr::VK_ARM_PREL31;
}
@@ -47,4 +47,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 2b6b36bc3e68..8eb9dbf5f9de 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -92,7 +92,8 @@ int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
}
-int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -310,7 +311,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
}
-int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
@@ -335,7 +337,7 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
return LT.first;
}
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
@@ -504,7 +506,7 @@ int ARMTTIImpl::getArithmeticInstrCost(
}
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace, const Instruction *I) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (Src->isVectorTy() && Alignment != 16 &&
@@ -529,12 +531,14 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
unsigned NumElts = VecTy->getVectorNumElements();
- Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+ auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
// vldN/vstN only support legal vector types of size 64 or 128 in bits.
- if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
- return Factor;
+ // Accesses having vector types that are a multiple of 128 bits can be
+ // matched to more than one vldN/vstN instruction.
+ if (NumElts % Factor == 0 &&
+ TLI->isLegalInterleavedAccessType(SubVecTy, DL))
+ return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 3c83cd92a61a..7de0543dfa5e 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -33,10 +33,6 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const ARMSubtarget *ST;
const ARMTargetLowering *TLI;
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
const ARMSubtarget *getST() const { return ST; }
const ARMTargetLowering *getTLI() const { return TLI; }
@@ -98,9 +94,11 @@ public:
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
@@ -118,7 +116,7 @@ public:
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c243a2d35979..f421d3ac1693 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -915,40 +915,37 @@ public:
int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
return Val != -1;
}
- bool isFBits16() const {
+
+ template<int64_t N, int64_t M>
+ bool isImmediate() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
- return Value >= 0 && Value <= 16;
+ return Value >= N && Value <= M;
}
- bool isFBits32() const {
+ template<int64_t N, int64_t M>
+ bool isImmediateS4() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
- return Value >= 1 && Value <= 32;
+ return ((Value & 3) == 0) && Value >= N && Value <= M;
+ }
+ bool isFBits16() const {
+ return isImmediate<0, 17>();
+ }
+ bool isFBits32() const {
+ return isImmediate<1, 33>();
}
bool isImm8s4() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
+ return isImmediateS4<-1020, 1020>();
}
bool isImm0_1020s4() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ((Value & 3) == 0) && Value >= 0 && Value <= 1020;
+ return isImmediateS4<0, 1020>();
}
bool isImm0_508s4() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ((Value & 3) == 0) && Value >= 0 && Value <= 508;
+ return isImmediateS4<0, 508>();
}
bool isImm0_508s4Neg() const {
if (!isImm()) return false;
@@ -958,27 +955,6 @@ public:
// explicitly exclude zero. we want that to use the normal 0_508 version.
return ((Value & 3) == 0) && Value > 0 && Value <= 508;
}
- bool isImm0_239() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 240;
- }
- bool isImm0_255() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 256;
- }
- bool isImm0_4095() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 4096;
- }
bool isImm0_4095Neg() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -986,145 +962,17 @@ public:
int64_t Value = -CE->getValue();
return Value > 0 && Value < 4096;
}
- bool isImm0_1() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 2;
- }
- bool isImm0_3() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 4;
- }
bool isImm0_7() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 8;
- }
- bool isImm0_15() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 16;
- }
- bool isImm0_31() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 32;
- }
- bool isImm0_63() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 64;
- }
- bool isImm8() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value == 8;
- }
- bool isImm16() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value == 16;
- }
- bool isImm32() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value == 32;
- }
- bool isShrImm8() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 8;
- }
- bool isShrImm16() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 16;
- }
- bool isShrImm32() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 32;
- }
- bool isShrImm64() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 64;
- }
- bool isImm1_7() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 8;
- }
- bool isImm1_15() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 16;
- }
- bool isImm1_31() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 32;
+ return isImmediate<0, 7>();
}
bool isImm1_16() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 17;
+ return isImmediate<1, 16>();
}
bool isImm1_32() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 33;
- }
- bool isImm0_32() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 33;
+ return isImmediate<1, 32>();
}
- bool isImm0_65535() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 65536;
+ bool isImm8_255() const {
+ return isImmediate<8, 255>();
}
bool isImm256_65535Expr() const {
if (!isImm()) return false;
@@ -1145,32 +993,16 @@ public:
return Value >= 0 && Value < 65536;
}
bool isImm24bit() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value <= 0xffffff;
+ return isImmediate<0, 0xffffff + 1>();
}
bool isImmThumbSR() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 33;
+ return isImmediate<1, 33>();
}
bool isPKHLSLImm() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 32;
+ return isImmediate<0, 32>();
}
bool isPKHASRImm() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 32;
+ return isImmediate<0, 33>();
}
bool isAdrLabel() const {
// If we have an immediate that's not a constant, treat it as a label
@@ -1245,6 +1077,20 @@ public:
return ARM_AM::getSOImmVal(Value) == -1 &&
ARM_AM::getSOImmVal(-Value) != -1;
}
+ bool isThumbModImmNeg1_7() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int32_t Value = -(int32_t)CE->getValue();
+ return 0 < Value && Value < 8;
+ }
+ bool isThumbModImmNeg8_255() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int32_t Value = -(int32_t)CE->getValue();
+ return 7 < Value && Value < 256;
+ }
bool isConstantPoolImm() const { return Kind == k_ConstantPoolImmediate; }
bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
@@ -2035,6 +1881,20 @@ public:
Inst.addOperand(MCOperand::createImm(Enc));
}
+ void addThumbModImmNeg8_255Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint32_t Val = -CE->getValue();
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addThumbModImmNeg1_7Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint32_t Val = -CE->getValue();
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
void addBitfieldOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// Munge the lsb/width into a bitfield mask.
@@ -2141,7 +2001,7 @@ public:
// The operand is actually a t2_so_imm, but we have its bitwise
// negation in the assembly source, so twiddle it here.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::createImm(~CE->getValue()));
+ Inst.addOperand(MCOperand::createImm(~(uint32_t)CE->getValue()));
}
void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
@@ -2149,7 +2009,7 @@ public:
// The operand is actually a t2_so_imm, but we have its
// negation in the assembly source, so twiddle it here.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::createImm(-CE->getValue()));
+ Inst.addOperand(MCOperand::createImm(-(uint32_t)CE->getValue()));
}
void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const {
@@ -4330,7 +4190,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
// If some specific flag is already set, it means that some letter is
// present more than once, this is not acceptable.
- if (FlagsVal == ~0U || (FlagsVal & Flag))
+ if (Flag == ~0U || (FlagsVal & Flag))
return MatchOperand_NoMatch;
FlagsVal |= Flag;
}
@@ -5484,7 +5344,8 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
enum {
COFF = (1 << MCObjectFileInfo::IsCOFF),
ELF = (1 << MCObjectFileInfo::IsELF),
- MACHO = (1 << MCObjectFileInfo::IsMachO)
+ MACHO = (1 << MCObjectFileInfo::IsMachO),
+ WASM = (1 << MCObjectFileInfo::IsWasm),
};
static const struct PrefixEntry {
const char *Spelling;
@@ -5518,6 +5379,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
case MCObjectFileInfo::IsCOFF:
CurrentFormat = COFF;
break;
+ case MCObjectFileInfo::IsWasm:
+ CurrentFormat = WASM;
+ break;
}
if (~Prefix->SupportedFormats & CurrentFormat) {
@@ -6301,10 +6165,6 @@ bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst,
else if (ListContainsPC && ListContainsLR)
return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
"PC and LR may not be in the register list simultaneously");
- else if (inITBlock() && !lastInITBlock() && ListContainsPC)
- return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
- "instruction must be outside of IT block or the last "
- "instruction in an IT block");
return false;
}
@@ -6366,6 +6226,12 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Warning(Loc, "predicated instructions should be in IT block");
}
+ // PC-setting instructions in an IT block, but not the last instruction of
+ // the block, are UNPREDICTABLE.
+ if (inExplicitITBlock() && !lastInITBlock() && isITBlockTerminator(Inst)) {
+ return Error(Loc, "instruction must be outside of IT block or the last instruction in an IT block");
+ }
+
const unsigned Opcode = Inst.getOpcode();
switch (Opcode) {
case ARM::LDRD:
@@ -6676,6 +6542,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
break;
}
case ARM::MOVi16:
+ case ARM::MOVTi16:
case ARM::t2MOVi16:
case ARM::t2MOVTi16:
{
@@ -8232,7 +8099,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
case ARM::t2LSRri:
case ARM::t2ASRri: {
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
- Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
!(static_cast<ARMOperand &>(*Operands[3]).isToken() &&
static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) {
@@ -8307,23 +8174,38 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
isNarrow = true;
MCInst TmpInst;
unsigned newOpc;
- switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) {
- default: llvm_unreachable("unexpected opcode!");
- case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
- case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
- case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
- case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
- case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
- }
+ unsigned Shift = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
unsigned Amount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+ bool isMov = false;
+ // MOV rd, rm, LSL #0 is actually a MOV instruction
+ if (Shift == ARM_AM::lsl && Amount == 0) {
+ isMov = true;
+ // The 16-bit encoding of MOV rd, rm, LSL #N is explicitly encoding T2 of
+ // MOV (register) in the ARMv8-A and ARMv8-M manuals, and immediate 0 is
+ // unpredictable in an IT block so the 32-bit encoding T3 has to be used
+ // instead.
+ if (inITBlock()) {
+ isNarrow = false;
+ }
+ newOpc = isNarrow ? ARM::tMOVSr : ARM::t2MOVr;
+ } else {
+ switch(Shift) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
+ case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
+ case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
+ }
+ }
if (Amount == 32) Amount = 0;
TmpInst.setOpcode(newOpc);
TmpInst.addOperand(Inst.getOperand(0)); // Rd
- if (isNarrow)
+ if (isNarrow && !isMov)
TmpInst.addOperand(MCOperand::createReg(
Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
TmpInst.addOperand(Inst.getOperand(1)); // Rn
- if (newOpc != ARM::t2RRX)
+ if (newOpc != ARM::t2RRX && !isMov)
TmpInst.addOperand(MCOperand::createImm(Amount));
TmpInst.addOperand(Inst.getOperand(3)); // CondCode
TmpInst.addOperand(Inst.getOperand(4));
@@ -8918,6 +8800,9 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR &&
inITBlock())
return Match_RequiresNotITBlock;
+ // LSL with zero immediate is not allowed in an IT block
+ if (Opc == ARM::tLSLri && Inst.getOperand(3).getImm() == 0 && inITBlock())
+ return Match_RequiresNotITBlock;
} else if (isThumbOne()) {
// Some high-register supporting Thumb1 encodings only allow both registers
// to be from r0-r7 when in Thumb2.
@@ -8932,6 +8817,22 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_RequiresV6;
}
+ // Before ARMv8 the rules for when SP is allowed in t2MOVr are more complex
+ // than the loop below can handle, so it uses the GPRnopc register class and
+ // we do SP handling here.
+ if (Opc == ARM::t2MOVr && !hasV8Ops())
+ {
+ // SP as both source and destination is not allowed
+ if (Inst.getOperand(0).getReg() == ARM::SP &&
+ Inst.getOperand(1).getReg() == ARM::SP)
+ return Match_RequiresV8;
+ // When flags-setting SP as either source or destination is not allowed
+ if (Inst.getOperand(4).getReg() == ARM::CPSR &&
+ (Inst.getOperand(0).getReg() == ARM::SP ||
+ Inst.getOperand(1).getReg() == ARM::SP))
+ return Match_RequiresV8;
+ }
+
for (unsigned I = 0; I < MCID.NumOperands; ++I)
if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) {
// rGPRRegClass excludes PC, and also excluded SP before ARMv8
@@ -8945,7 +8846,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
}
namespace llvm {
-template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
+template <> inline bool IsCPSRDead<MCInst>(const MCInst *Instr) {
return true; // In an assembly source, no need to second-guess
}
}
@@ -8975,6 +8876,7 @@ bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const {
// operands. We only care about Thumb instructions here, as ARM instructions
// obviously can't be in an IT block.
switch (Inst.getOpcode()) {
+ case ARM::tLDMIA:
case ARM::t2LDMIA:
case ARM::t2LDMIA_UPD:
case ARM::t2LDMDB:
@@ -9088,6 +8990,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MatchResult = MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
PendConditionalInstruction, Out);
+ SMLoc ErrorLoc;
+ if (ErrorInfo < Operands.size()) {
+ ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+
switch (MatchResult) {
case Match_Success:
// Context sensitive operand constraints aren't handled by the matcher,
@@ -9177,16 +9086,52 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires ARMv8 or later");
case Match_RequiresFlagSetting:
return Error(IDLoc, "no flag-preserving variant of this instruction available");
- case Match_ImmRange0_15: {
- SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
- if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ case Match_ImmRange0_1:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,1]");
+ case Match_ImmRange0_3:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,3]");
+ case Match_ImmRange0_7:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,7]");
+ case Match_ImmRange0_15:
return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
- }
- case Match_ImmRange0_239: {
- SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
- if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ case Match_ImmRange0_31:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,31]");
+ case Match_ImmRange0_32:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,32]");
+ case Match_ImmRange0_63:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,63]");
+ case Match_ImmRange0_239:
return Error(ErrorLoc, "immediate operand must be in the range [0,239]");
- }
+ case Match_ImmRange0_255:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,255]");
+ case Match_ImmRange0_4095:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,4095]");
+ case Match_ImmRange0_65535:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,65535]");
+ case Match_ImmRange1_7:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,7]");
+ case Match_ImmRange1_8:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,8]");
+ case Match_ImmRange1_15:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,15]");
+ case Match_ImmRange1_16:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,16]");
+ case Match_ImmRange1_31:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,31]");
+ case Match_ImmRange1_32:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,32]");
+ case Match_ImmRange1_64:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,64]");
+ case Match_ImmRange8_8:
+ return Error(ErrorLoc, "immediate operand must be 8.");
+ case Match_ImmRange16_16:
+ return Error(ErrorLoc, "immediate operand must be 16.");
+ case Match_ImmRange32_32:
+ return Error(ErrorLoc, "immediate operand must be 32.");
+ case Match_ImmRange256_65535:
+ return Error(ErrorLoc, "immediate operand must be in the range [255,65535]");
+ case Match_ImmRange0_16777215:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,0xffffff]");
case Match_AlignedMemoryRequiresNone:
case Match_DupAlignedMemoryRequiresNone:
case Match_AlignedMemoryRequires16:
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 0c57a3e3166b..1062c7943201 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -1,5 +1,6 @@
set(LLVM_TARGET_DEFINITIONS ARM.td)
+tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank)
tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter)
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ac3d8c780af2..e812d32cc76f 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -7,21 +7,24 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
-#include "MCTargetDesc/ARMMCExpr.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
#include <vector>
using namespace llvm;
@@ -31,6 +34,7 @@ using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
+
// Handles the condition code status of instructions in IT blocks
class ITStatus
{
@@ -81,9 +85,7 @@ namespace {
private:
std::vector<unsigned char> ITStates;
};
-}
-namespace {
/// ARM disassembler for all ARM platforms.
class ARMDisassembler : public MCDisassembler {
public:
@@ -91,7 +93,7 @@ public:
MCDisassembler(STI, Ctx) {
}
- ~ARMDisassembler() override {}
+ ~ARMDisassembler() override = default;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -106,7 +108,7 @@ public:
MCDisassembler(STI, Ctx) {
}
- ~ThumbDisassembler() override {}
+ ~ThumbDisassembler() override = default;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -118,7 +120,8 @@ private:
DecodeStatus AddThumbPredicate(MCInst&) const;
void UpdateThumbVFPPredicate(MCInst&) const;
};
-}
+
+} // end anonymous namespace
static bool Check(DecodeStatus &Out, DecodeStatus In) {
switch (In) {
@@ -135,7 +138,6 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
llvm_unreachable("Invalid DecodeStatus!");
}
-
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -319,7 +321,6 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
@@ -395,8 +396,9 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+
#include "ARMGenDisassemblerTables.inc"
static MCDisassembler *createARMDisassembler(const Target &T,
@@ -416,8 +418,7 @@ static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
uint64_t Address, raw_ostream &OS,
raw_ostream &CS,
uint32_t Insn,
- DecodeStatus Result)
-{
+ DecodeStatus Result) {
switch (MI.getOpcode()) {
case ARM::HVC: {
// HVC is undefined if condition = 0xf otherwise upredictable
@@ -461,65 +462,28 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result);
}
- // VFP and NEON instructions, similarly, are shared between ARM
- // and Thumb modes.
- Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
-
- Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
-
- Result =
- decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- // Add a fake predicate operand, because we share these instruction
- // definitions with Thumb2 where these instructions are predicable.
- if (!DecodePredicateOperand(MI, 0xE, Address, this))
- return MCDisassembler::Fail;
- return Result;
- }
-
- Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address,
- this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- // Add a fake predicate operand, because we share these instruction
- // definitions with Thumb2 where these instructions are predicable.
- if (!DecodePredicateOperand(MI, 0xE, Address, this))
- return MCDisassembler::Fail;
- return Result;
- }
-
- Result =
- decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- // Add a fake predicate operand, because we share these instruction
- // definitions with Thumb2 where these instructions are predicable.
- if (!DecodePredicateOperand(MI, 0xE, Address, this))
- return MCDisassembler::Fail;
- return Result;
- }
+ struct DecodeTable {
+ const uint8_t *P;
+ bool DecodePred;
+ };
- Result =
- decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
+ const DecodeTable Tables[] = {
+ {DecoderTableVFP32, false}, {DecoderTableVFPV832, false},
+ {DecoderTableNEONData32, true}, {DecoderTableNEONLoadStore32, true},
+ {DecoderTableNEONDup32, true}, {DecoderTablev8NEON32, false},
+ {DecoderTablev8Crypto32, false},
+ };
- Result =
- decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
+ for (auto Table : Tables) {
+ Result = decodeInstruction(Table.P, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (Table.DecodePred && !DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return Result;
+ }
}
Size = 0;
@@ -527,8 +491,10 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
namespace llvm {
+
extern const MCInstrDesc ARMInsts[];
-}
+
+} // end namespace llvm
/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
/// immediate Value in the MCInst. The immediate Value has had any PC
@@ -859,7 +825,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return MCDisassembler::Fail;
}
-
extern "C" void LLVMInitializeARMDisassembler() {
TargetRegistry::RegisterMCDisassembler(getTheARMLETarget(),
createARMDisassembler);
@@ -1056,7 +1021,6 @@ static const uint16_t QPRDecoderTable[] = {
ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15
};
-
static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 31 || (RegNo & 1) != 0)
@@ -1676,7 +1640,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
case ARM::LDRD:
case ARM::LDRD_PRE:
case ARM::LDRD_POST:
- if (type && Rn == 15){
+ if (type && Rn == 15) {
if (Rt2 == 15)
S = MCDisassembler::SoftFail;
break;
@@ -1693,7 +1657,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
case ARM::LDRH:
case ARM::LDRH_PRE:
case ARM::LDRH_POST:
- if (type && Rn == 15){
+ if (type && Rn == 15) {
if (Rt == 15)
S = MCDisassembler::SoftFail;
break;
@@ -1711,7 +1675,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
case ARM::LDRSB:
case ARM::LDRSB_PRE:
case ARM::LDRSB_POST:
- if (type && Rn == 15){
+ if (type && Rn == 15) {
if (Rt == 15)
S = MCDisassembler::SoftFail;
break;
@@ -2309,7 +2273,6 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3748,7 +3711,6 @@ static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-
static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4073,7 +4035,7 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
static DecodeStatus
DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder){
+ uint64_t Address, const void *Decoder) {
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4,
true, 2, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(SignExtend32<9>(Val << 1)));
@@ -4081,7 +4043,8 @@ DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder){
+ uint64_t Address,
+ const void *Decoder) {
// Val is passed in as S:J1:J2:imm10:imm11
// Note no trailing zero after imm11. Also the J1 and J2 values are from
// the encoded instruction. So here change to I1 and I2 values via:
@@ -4247,7 +4210,8 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder){
+ uint64_t Address,
+ const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -4323,7 +4287,6 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4506,7 +4469,6 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4637,7 +4599,6 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4771,7 +4732,6 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -5266,9 +5226,8 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
-
DecodeStatus S = MCDisassembler::Success;
unsigned CRm = fieldFromInstruction(Val, 0, 4);
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 3667952d44c0..57b91366a085 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -20,7 +20,15 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -73,7 +81,6 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
unsigned Opcode = MI->getOpcode();
switch (Opcode) {
-
// Check for MOVs and print canonical forms, instead.
case ARM::MOVsr: {
// FIXME: Thumb variants?
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 9d80eed84dc2..86873a3a6ccb 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -235,4 +235,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index a58d5b34131b..40bf545e8322 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -357,13 +357,14 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
}
unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- bool IsPCRel, MCContext *Ctx,
+ bool IsPCRel, MCContext &Ctx,
bool IsLittleEndian,
bool IsResolved) const {
unsigned Kind = Fixup.getKind();
switch (Kind) {
default:
- llvm_unreachable("Unknown fixup kind!");
+ Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type");
+ return 0;
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -412,8 +413,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = -Value;
isAdd = false;
}
- if (Ctx && Value >= 4096) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Value >= 4096) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
Value |= isAdd << 23;
@@ -433,8 +434,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = -Value;
opc = 2; // 0b0010
}
- if (Ctx && ARM_AM::getSOImmVal(Value) == -1) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (ARM_AM::getSOImmVal(Value) == -1) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
// Encode the immediate and shift the opcode into place.
@@ -541,8 +542,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
//
// Note that the halfwords are stored high first, low second; so we need
// to transpose the fixup value here to map properly.
- if (Ctx && Value % 4 != 0) {
- Ctx->reportError(Fixup.getLoc(), "misaligned ARM call destination");
+ if (Value % 4 != 0) {
+ Ctx.reportError(Fixup.getLoc(), "misaligned ARM call destination");
return 0;
}
@@ -568,10 +569,10 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_thumb_cp:
// On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we
// could have an error on our hands.
- if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
+ if (!STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
- Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
return 0;
}
}
@@ -581,8 +582,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// CB instructions can only branch to offsets in [4, 126] in multiples of 2
// so ensure that the raw value LSB is zero and it lies in [2, 130].
// An offset of 2 will be relaxed to a NOP.
- if (Ctx && ((int64_t)Value < 2 || Value > 0x82 || Value & 1)) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if ((int64_t)Value < 2 || Value > 0x82 || Value & 1) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
// Offset by 4 and don't encode the lower bit, which is always 0.
@@ -592,21 +593,21 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
case ARM::fixup_arm_thumb_br:
// Offset by 4 and don't encode the lower bit, which is always 0.
- if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] &&
- !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) {
+ if (!STI->getFeatureBits()[ARM::FeatureThumb2] &&
+ !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
- Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
return 0;
}
}
return ((Value - 4) >> 1) & 0x7ff;
case ARM::fixup_arm_thumb_bcc:
// Offset by 4 and don't encode the lower bit, which is always 0.
- if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ if (!STI->getFeatureBits()[ARM::FeatureThumb2]) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
- Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
return 0;
}
}
@@ -620,8 +621,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
isAdd = false;
}
// The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
- if (Ctx && Value >= 256) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Value >= 256) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
Value = (Value & 0xf) | ((Value & 0xf0) << 4);
@@ -641,8 +642,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
// These values don't encode the low two bits since they're always zero.
Value >>= 2;
- if (Ctx && Value >= 256) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Value >= 256) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
Value |= isAdd << 23;
@@ -667,13 +668,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
isAdd = false;
}
// These values don't encode the low bit since it's always zero.
- if (Ctx && (Value & 1)) {
- Ctx->reportError(Fixup.getLoc(), "invalid value for this fixup");
+ if (Value & 1) {
+ Ctx.reportError(Fixup.getLoc(), "invalid value for this fixup");
return 0;
}
Value >>= 1;
- if (Ctx && Value >= 256) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Value >= 256) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
Value |= isAdd << 23;
@@ -687,8 +688,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
case ARM::fixup_arm_mod_imm:
Value = ARM_AM::getSOImmVal(Value);
- if (Ctx && Value >> 12) {
- Ctx->reportError(Fixup.getLoc(), "out of range immediate fixup value");
+ if (Value >> 12) {
+ Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value");
return 0;
}
return Value;
@@ -737,12 +738,6 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
(unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
(unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
IsResolved = false;
-
- // Try to get the encoded value for the fixup as-if we're mapping it into
- // the instruction. This allows adjustFixupValue() to issue a diagnostic
- // if the value aren't invalid.
- (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(),
- IsLittleEndian, IsResolved);
}
/// getFixupKindNumBytes - The number of bytes the fixup may change.
@@ -846,11 +841,10 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
}
void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ unsigned DataSize, uint64_t Value, bool IsPCRel,
+ MCContext &Ctx) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- Value =
- adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian, true);
+ Value = adjustFixupValue(Fixup, Value, IsPCRel, Ctx, IsLittleEndian, true);
if (!Value)
return; // Doesn't change encoding.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 84caaacc47d3..2ddedb5d6105 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -46,11 +46,11 @@ public:
bool &IsResolved) override;
unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel,
- MCContext *Ctx, bool IsLittleEndian,
+ MCContext &Ctx, bool IsLittleEndian,
bool IsResolved) const;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
unsigned getRelaxedOpcode(unsigned Op) const;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 088b4205ed62..92e553f21f14 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -291,7 +291,11 @@ namespace ARMII {
/// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects
/// just that part of the flag set.
- MO_OPTION_MASK = 0x1f,
+ MO_OPTION_MASK = 0x0f,
+
+ /// MO_SBREL - On a symbol operand, this represents a static base relative
+ /// relocation. Used in movw and movt instructions.
+ MO_SBREL = 0x10,
/// MO_DLLIMPORT - On a symbol operand, this represents that the reference
/// to the symbol is for an import stub. This is used for DLL import
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 6f19754b899e..e1fa24571820 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -7,32 +7,32 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMFixupKinds.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
using namespace llvm;
namespace {
+
class ARMELFObjectWriter : public MCELFObjectTargetWriter {
enum { DefaultEABIVersion = 0x05000000U };
- unsigned GetRelocTypeInner(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const;
+ unsigned GetRelocTypeInner(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, MCContext &Ctx) const;
public:
ARMELFObjectWriter(uint8_t OSABI);
- ~ARMELFObjectWriter() override;
+ ~ARMELFObjectWriter() override = default;
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
@@ -40,15 +40,14 @@ namespace {
bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
};
-}
+
+} // end anonymous namespace
ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI,
ELF::EM_ARM,
/*HasRelocationAddend*/ false) {}
-ARMELFObjectWriter::~ARMELFObjectWriter() {}
-
bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const {
// FIXME: This is extremely conservative. This really needs to use a
@@ -70,19 +69,20 @@ bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned ARMELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
- return GetRelocTypeInner(Target, Fixup, IsPCRel);
+ return GetRelocTypeInner(Target, Fixup, IsPCRel, Ctx);
}
unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
const MCFixup &Fixup,
- bool IsPCRel) const {
+ bool IsPCRel,
+ MCContext &Ctx) const {
MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
unsigned Type = 0;
if (IsPCRel) {
switch ((unsigned)Fixup.getKind()) {
default:
- report_fatal_error("unsupported relocation on symbol");
+ Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
return ELF::R_ARM_NONE;
case FK_Data_4:
switch (Modifier) {
@@ -161,7 +161,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
} else {
switch ((unsigned)Fixup.getKind()) {
default:
- report_fatal_error("unsupported relocation on symbol");
+ Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
return ELF::R_ARM_NONE;
case FK_Data_1:
switch (Modifier) {
@@ -270,10 +270,26 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
}
break;
case ARM::fixup_t2_movt_hi16:
- Type = ELF::R_ARM_THM_MOVT_ABS;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_THM_MOVT_ABS;
+ break;
+ case MCSymbolRefExpr::VK_ARM_SBREL:
+ Type = ELF:: R_ARM_THM_MOVT_BREL;
+ break;
+ }
break;
case ARM::fixup_t2_movw_lo16:
- Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+ break;
+ case MCSymbolRefExpr::VK_ARM_SBREL:
+ Type = ELF:: R_ARM_THM_MOVW_BREL_NC;
+ break;
+ }
break;
}
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index f6bb35d2326b..6fa890ba1cd5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -15,7 +15,11 @@
#include "ARMRegisterInfo.h"
#include "ARMUnwindOpAsm.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -24,25 +28,33 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetParser.h"
#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <string>
using namespace llvm;
@@ -101,16 +113,21 @@ ARMTargetAsmStreamer::ARMTargetAsmStreamer(MCStreamer &S,
bool VerboseAsm)
: ARMTargetStreamer(S), OS(OS), InstPrinter(InstPrinter),
IsVerboseAsm(VerboseAsm) {}
+
void ARMTargetAsmStreamer::emitFnStart() { OS << "\t.fnstart\n"; }
void ARMTargetAsmStreamer::emitFnEnd() { OS << "\t.fnend\n"; }
void ARMTargetAsmStreamer::emitCantUnwind() { OS << "\t.cantunwind\n"; }
+
void ARMTargetAsmStreamer::emitPersonality(const MCSymbol *Personality) {
OS << "\t.personality " << Personality->getName() << '\n';
}
+
void ARMTargetAsmStreamer::emitPersonalityIndex(unsigned Index) {
OS << "\t.personalityindex " << Index << '\n';
}
+
void ARMTargetAsmStreamer::emitHandlerData() { OS << "\t.handlerdata\n"; }
+
void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
int64_t Offset) {
OS << "\t.setfp\t";
@@ -121,6 +138,7 @@ void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
OS << ", #" << Offset;
OS << '\n';
}
+
void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
assert((Reg != ARM::SP && Reg != ARM::PC) &&
"the operand of .movsp cannot be either sp or pc");
@@ -131,9 +149,11 @@ void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
OS << ", #" << Offset;
OS << '\n';
}
+
void ARMTargetAsmStreamer::emitPad(int64_t Offset) {
OS << "\t.pad\t#" << Offset << '\n';
}
+
void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool isVector) {
assert(RegList.size() && "RegList should not be empty");
@@ -151,8 +171,9 @@ void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
OS << "}\n";
}
-void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {
-}
+
+void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {}
+
void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value);
if (IsVerboseAsm) {
@@ -162,6 +183,7 @@ void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
}
OS << "\n";
}
+
void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
StringRef String) {
switch (Attribute) {
@@ -179,6 +201,7 @@ void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
}
OS << "\n";
}
+
void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
unsigned IntValue,
StringRef StringValue) {
@@ -194,20 +217,25 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
}
OS << "\n";
}
+
void ARMTargetAsmStreamer::emitArch(unsigned Arch) {
OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n";
}
+
void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) {
OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n";
}
+
void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) {
OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n';
}
+
void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n";
}
-void ARMTargetAsmStreamer::finishAttributeSection() {
-}
+
+void ARMTargetAsmStreamer::finishAttributeSection() {}
+
void
ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
OS << "\t.tlsdescseq\t" << S->getSymbol().getName();
@@ -274,12 +302,12 @@ private:
};
StringRef CurrentVendor;
- unsigned FPU;
- unsigned Arch;
- unsigned EmittedArch;
+ unsigned FPU = ARM::FK_INVALID;
+ unsigned Arch = ARM::AK_INVALID;
+ unsigned EmittedArch = ARM::AK_INVALID;
SmallVector<AttributeItem, 64> Contents;
- MCSection *AttributeSection;
+ MCSection *AttributeSection = nullptr;
AttributeItem *getAttributeItem(unsigned Attribute) {
for (size_t i = 0; i < Contents.size(); ++i)
@@ -393,9 +421,7 @@ private:
public:
ARMTargetELFStreamer(MCStreamer &S)
- : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID),
- Arch(ARM::AK_INVALID), EmittedArch(ARM::AK_INVALID),
- AttributeSection(nullptr) {}
+ : ARMTargetStreamer(S), CurrentVendor("aeabi") {}
};
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
@@ -416,12 +442,11 @@ public:
ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool IsThumb)
- : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb),
- MappingSymbolCounter(0), LastEMS(EMS_None) {
+ : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb) {
EHReset();
}
- ~ARMELFStreamer() {}
+ ~ARMELFStreamer() override = default;
void FinishImpl() override;
@@ -439,20 +464,21 @@ public:
void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes);
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
- // We have to keep track of the mapping symbol state of any sections we
- // use. Each one should start off as EMS_None, which is provided as the
- // default constructor by DenseMap::lookup.
- LastMappingSymbols[getPreviousSection().first] = LastEMS;
- LastEMS = LastMappingSymbols.lookup(Section);
-
+ LastMappingSymbols[getPreviousSection().first] = std::move(LastEMSInfo);
MCELFStreamer::ChangeSection(Section, Subsection);
+ auto LastMappingSymbol = LastMappingSymbols.find(Section);
+ if (LastMappingSymbol != LastMappingSymbols.end()) {
+ LastEMSInfo = std::move(LastMappingSymbol->second);
+ return;
+ }
+ LastEMSInfo.reset(new ElfMappingSymbolInfo(SMLoc(), nullptr, 0));
}
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst& Inst,
- const MCSubtargetInfo &STI) override {
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override {
if (IsThumb)
EmitThumbMappingSymbol();
else
@@ -507,15 +533,25 @@ public:
MCELFStreamer::EmitBytes(Data);
}
+ void FlushPendingMappingSymbol() {
+ if (!LastEMSInfo->hasInfo())
+ return;
+ ElfMappingSymbolInfo *EMS = LastEMSInfo.get();
+ EmitMappingSymbol("$d", EMS->Loc, EMS->F, EMS->Offset);
+ EMS->resetInfo();
+ }
+
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
/// necessary.
void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
- if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value))
+ if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value)) {
if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) {
getContext().reportError(Loc, "relocated expression must be 32-bit");
return;
}
+ getOrCreateDataFragment();
+ }
EmitDataMappingSymbol();
MCELFStreamer::EmitValueImpl(Value, Size, Loc);
@@ -548,22 +584,54 @@ private:
EMS_Data
};
+ struct ElfMappingSymbolInfo {
+ explicit ElfMappingSymbolInfo(SMLoc Loc, MCFragment *F, uint64_t O)
+ : Loc(Loc), F(F), Offset(O), State(EMS_None) {}
+ void resetInfo() {
+ F = nullptr;
+ Offset = 0;
+ }
+ bool hasInfo() { return F != nullptr; }
+ SMLoc Loc;
+ MCFragment *F;
+ uint64_t Offset;
+ ElfMappingSymbol State;
+ };
+
void EmitDataMappingSymbol() {
- if (LastEMS == EMS_Data) return;
+ if (LastEMSInfo->State == EMS_Data)
+ return;
+ else if (LastEMSInfo->State == EMS_None) {
+ // This is a tentative symbol, it won't really be emitted until it's
+ // actually needed.
+ ElfMappingSymbolInfo *EMS = LastEMSInfo.get();
+ auto *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+ if (!DF)
+ return;
+ EMS->Loc = SMLoc();
+ EMS->F = getCurrentFragment();
+ EMS->Offset = DF->getContents().size();
+ LastEMSInfo->State = EMS_Data;
+ return;
+ }
EmitMappingSymbol("$d");
- LastEMS = EMS_Data;
+ LastEMSInfo->State = EMS_Data;
}
void EmitThumbMappingSymbol() {
- if (LastEMS == EMS_Thumb) return;
+ if (LastEMSInfo->State == EMS_Thumb)
+ return;
+ FlushPendingMappingSymbol();
EmitMappingSymbol("$t");
- LastEMS = EMS_Thumb;
+ LastEMSInfo->State = EMS_Thumb;
}
void EmitARMMappingSymbol() {
- if (LastEMS == EMS_ARM) return;
+ if (LastEMSInfo->State == EMS_ARM)
+ return;
+ FlushPendingMappingSymbol();
EmitMappingSymbol("$a");
- LastEMS = EMS_ARM;
+ LastEMSInfo->State = EMS_ARM;
}
void EmitMappingSymbol(StringRef Name) {
@@ -576,6 +644,17 @@ private:
Symbol->setExternal(false);
}
+ void EmitMappingSymbol(StringRef Name, SMLoc Loc, MCFragment *F,
+ uint64_t Offset) {
+ auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
+ Name + "." + Twine(MappingSymbolCounter++)));
+ EmitLabel(Symbol, Loc, F);
+ Symbol->setType(ELF::STT_NOTYPE);
+ Symbol->setBinding(ELF::STB_LOCAL);
+ Symbol->setExternal(false);
+ Symbol->setOffset(Offset);
+ }
+
void EmitThumbFunc(MCSymbol *Func) override {
getAssembler().setIsThumbFunc(Func);
EmitSymbolAttribute(Func, MCSA_ELF_TypeFunction);
@@ -599,10 +678,12 @@ private:
void EmitFixup(const MCExpr *Expr, MCFixupKind Kind);
bool IsThumb;
- int64_t MappingSymbolCounter;
+ int64_t MappingSymbolCounter = 0;
+
+ DenseMap<const MCSection *, std::unique_ptr<ElfMappingSymbolInfo>>
+ LastMappingSymbols;
- DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
- ElfMappingSymbol LastEMS;
+ std::unique_ptr<ElfMappingSymbolInfo> LastEMSInfo;
// ARM Exception Handling Frame Information
MCSymbol *ExTab;
@@ -618,6 +699,7 @@ private:
SmallVector<uint8_t, 64> Opcodes;
UnwindOpcodeAssembler UnwindOpAsm;
};
+
} // end anonymous namespace
ARMELFStreamer &ARMTargetELFStreamer::getStreamer() {
@@ -627,33 +709,42 @@ ARMELFStreamer &ARMTargetELFStreamer::getStreamer() {
void ARMTargetELFStreamer::emitFnStart() { getStreamer().emitFnStart(); }
void ARMTargetELFStreamer::emitFnEnd() { getStreamer().emitFnEnd(); }
void ARMTargetELFStreamer::emitCantUnwind() { getStreamer().emitCantUnwind(); }
+
void ARMTargetELFStreamer::emitPersonality(const MCSymbol *Personality) {
getStreamer().emitPersonality(Personality);
}
+
void ARMTargetELFStreamer::emitPersonalityIndex(unsigned Index) {
getStreamer().emitPersonalityIndex(Index);
}
+
void ARMTargetELFStreamer::emitHandlerData() {
getStreamer().emitHandlerData();
}
+
void ARMTargetELFStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
int64_t Offset) {
getStreamer().emitSetFP(FpReg, SpReg, Offset);
}
+
void ARMTargetELFStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
getStreamer().emitMovSP(Reg, Offset);
}
+
void ARMTargetELFStreamer::emitPad(int64_t Offset) {
getStreamer().emitPad(Offset);
}
+
void ARMTargetELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool isVector) {
getStreamer().emitRegSave(RegList, isVector);
}
+
void ARMTargetELFStreamer::emitUnwindRaw(int64_t Offset,
const SmallVectorImpl<uint8_t> &Opcodes) {
getStreamer().emitUnwindRaw(Offset, Opcodes);
}
+
void ARMTargetELFStreamer::switchVendor(StringRef Vendor) {
assert(!Vendor.empty() && "Vendor cannot be empty.");
@@ -668,25 +759,31 @@ void ARMTargetELFStreamer::switchVendor(StringRef Vendor) {
CurrentVendor = Vendor;
}
+
void ARMTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
}
+
void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute,
StringRef Value) {
setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
}
+
void ARMTargetELFStreamer::emitIntTextAttribute(unsigned Attribute,
unsigned IntValue,
StringRef StringValue) {
setAttributeItems(Attribute, IntValue, StringValue,
/* OverwriteExisting= */ true);
}
+
void ARMTargetELFStreamer::emitArch(unsigned Value) {
Arch = Value;
}
+
void ARMTargetELFStreamer::emitObjectArch(unsigned Value) {
EmittedArch = Value;
}
+
void ARMTargetELFStreamer::emitArchDefaultAttributes() {
using namespace ARMBuildAttrs;
@@ -786,9 +883,11 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
break;
}
}
+
void ARMTargetELFStreamer::emitFPU(unsigned Value) {
FPU = Value;
}
+
void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
switch (FPU) {
case ARM::FK_VFP:
@@ -920,6 +1019,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
break;
}
}
+
size_t ARMTargetELFStreamer::calculateContentSize() const {
size_t Result = 0;
for (size_t i = 0; i < Contents.size(); ++i) {
@@ -944,6 +1044,7 @@ size_t ARMTargetELFStreamer::calculateContentSize() const {
}
return Result;
}
+
void ARMTargetELFStreamer::finishAttributeSection() {
// <format-version>
// [ <section-length> "vendor-name"
@@ -1093,9 +1194,9 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix,
const MCSymbolELF *Group = FnSection.getGroup();
if (Group)
Flags |= ELF::SHF_GROUP;
- MCSectionELF *EHSection =
- getContext().getELFSection(EHSecName, Type, Flags, 0, Group,
- FnSection.getUniqueID(), nullptr, &FnSection);
+ MCSectionELF *EHSection = getContext().getELFSection(
+ EHSecName, Type, Flags, 0, Group, FnSection.getUniqueID(),
+ static_cast<const MCSymbolELF *>(&Fn));
assert(EHSection && "Failed to get the required EH section");
@@ -1114,6 +1215,7 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER,
SectionKind::getData(), FnStart);
}
+
void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
MCDataFragment *Frag = getOrCreateDataFragment();
Frag->getFixups().push_back(MCFixup::create(Frag->getContents().size(), Expr,
@@ -1396,8 +1498,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
- }
-
}
-
+} // end namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 559a4f8de75f..d9df2c6da7ec 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -11,22 +11,33 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
using namespace llvm;
@@ -36,9 +47,8 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
namespace {
+
class ARMMCCodeEmitter : public MCCodeEmitter {
- ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete;
- void operator=(const ARMMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
const MCContext &CTX;
bool IsLittleEndian;
@@ -47,15 +57,18 @@ public:
ARMMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool IsLittle)
: MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) {
}
-
- ~ARMMCCodeEmitter() override {}
+ ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete;
+ ARMMCCodeEmitter &operator=(const ARMMCCodeEmitter &) = delete;
+ ~ARMMCCodeEmitter() override = default;
bool isThumb(const MCSubtargetInfo &STI) const {
return STI.getFeatureBits()[ARM::ModeThumb];
}
+
bool isThumb2(const MCSubtargetInfo &STI) const {
return isThumb(STI) && STI.getFeatureBits()[ARM::FeatureThumb2];
}
+
bool isTargetMachO(const MCSubtargetInfo &STI) const {
const Triple &TT = STI.getTargetTriple();
return TT.isOSBinFormatMachO();
@@ -200,6 +213,7 @@ public:
case ARM_AM::ib: return 3;
}
}
+
/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
///
unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
@@ -273,7 +287,6 @@ public:
unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(Op);
// We expect MO to be an immediate or an expression,
@@ -432,18 +445,6 @@ public:
} // end anonymous namespace
-MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new ARMMCCodeEmitter(MCII, Ctx, true);
-}
-
-MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new ARMMCCodeEmitter(MCII, Ctx, false);
-}
-
/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
/// instructions, and rewrite them to their Thumb2 form if we are currently in
/// Thumb2 mode.
@@ -550,7 +551,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
bool ARMMCCodeEmitter::
EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
@@ -1515,7 +1516,7 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
uint32_t v = ~MO.getImm();
uint32_t lsb = countTrailingZeros(v);
uint32_t msb = (32 - countLeadingZeros (v)) - 1;
- assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
+ assert(v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
return lsb | (msb << 5);
}
@@ -1700,3 +1701,15 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
}
#include "ARMGenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(MCII, Ctx, true);
+}
+
+MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(MCII, Ctx, false);
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 9e4d202321e6..477755157040 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -260,18 +260,37 @@ public:
return false;
int64_t Imm = Inst.getOperand(0).getImm();
- // FIXME: This is not right for thumb.
Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
return true;
}
};
+class ThumbMCInstrAnalysis : public ARMMCInstrAnalysis {
+public:
+ ThumbMCInstrAnalysis(const MCInstrInfo *Info) : ARMMCInstrAnalysis(Info) {}
+
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const override {
+ // We only handle PCRel branches for now.
+ if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
+ return false;
+
+ int64_t Imm = Inst.getOperand(0).getImm();
+ Target = Addr+Imm+4; // In Thumb mode the PC is always off by 4 bytes.
+ return true;
+ }
+};
+
}
static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
return new ARMMCInstrAnalysis(Info);
}
+static MCInstrAnalysis *createThumbMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new ThumbMCInstrAnalysis(Info);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeARMTargetMC() {
for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget(),
@@ -289,9 +308,6 @@ extern "C" void LLVMInitializeARMTargetMC() {
TargetRegistry::RegisterMCSubtargetInfo(*T,
ARM_MC::createARMMCSubtargetInfo);
- // Register the MC instruction analyzer.
- TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis);
-
TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
TargetRegistry::RegisterCOFFStreamer(*T, createARMWinCOFFStreamer);
TargetRegistry::RegisterMachOStreamer(*T, createARMMachOStreamer);
@@ -313,6 +329,12 @@ extern "C" void LLVMInitializeARMTargetMC() {
TargetRegistry::RegisterMCRelocationInfo(*T, createARMMCRelocationInfo);
}
+ // Register the MC instruction analyzer.
+ for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget()})
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis);
+ for (Target *T : {&getTheThumbLETarget(), &getTheThumbBETarget()})
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createThumbMCInstrAnalysis);
+
// Register the MC Code Emitter
for (Target *T : {&getTheARMLETarget(), &getTheThumbLETarget()})
TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
index 482bcf902518..34c770440e1b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMachORelocationInfo.cpp ----------------------------------------===//
+//===- ARMMachORelocationInfo.cpp -----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,17 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "ARMMCExpr.h"
-#include "llvm-c/Disassembler.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm-c/Disassembler.h"
using namespace llvm;
-using namespace object;
namespace {
+
class ARMMachORelocationInfo : public MCRelocationInfo {
public:
ARMMachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {}
@@ -35,7 +35,8 @@ public:
}
}
};
-} // End unnamed namespace
+
+} // end anonymous namespace
/// createARMMachORelocationInfo - Construct an ARM Mach-O RelocationInfo.
MCRelocationInfo *llvm::createARMMachORelocationInfo(MCContext &Ctx) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index c0d10c896354..73e563890dd9 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -10,20 +10,21 @@
// This file implements the ARMTargetStreamer class.
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/MapVector.h"
+
#include "llvm/MC/ConstantPools.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
using namespace llvm;
+
//
// ARMTargetStreamer Implemenation
//
+
ARMTargetStreamer::ARMTargetStreamer(MCStreamer &S)
: MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {}
-ARMTargetStreamer::~ARMTargetStreamer() {}
+ARMTargetStreamer::~ARMTargetStreamer() = default;
// The constant pool handling is shared by all ARMTargetStreamer
// implementations.
@@ -73,5 +74,4 @@ void ARMTargetStreamer::finishAttributeSection() {}
void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
void
ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
-
void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 173cc93d44fb..d3ab83bbccbc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -14,12 +14,14 @@
#include "ARMUnwindOpAsm.h"
#include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
using namespace llvm;
namespace {
+
/// UnwindOpcodeStreamer - The simple wrapper over SmallVector to emit bytes
/// with MSB to LSB per uint32_t ordering. For example, the first byte will
/// be placed in Vec[3], and the following bytes will be placed in 2, 1, 0,
@@ -27,20 +29,19 @@ namespace {
class UnwindOpcodeStreamer {
private:
SmallVectorImpl<uint8_t> &Vec;
- size_t Pos;
+ size_t Pos = 3;
public:
- UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V), Pos(3) {
- }
+ UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V) {}
/// Emit the byte in MSB to LSB per uint32_t order.
- inline void EmitByte(uint8_t elem) {
+ void EmitByte(uint8_t elem) {
Vec[Pos] = elem;
Pos = (((Pos ^ 0x3u) + 1) ^ 0x3u);
}
/// Emit the size prefix.
- inline void EmitSize(size_t Size) {
+ void EmitSize(size_t Size) {
size_t SizeInWords = (Size + 3) / 4;
assert(SizeInWords <= 0x100u &&
"Only 256 additional words are allowed for unwind opcodes");
@@ -48,19 +49,20 @@ namespace {
}
/// Emit the personality index prefix.
- inline void EmitPersonalityIndex(unsigned PI) {
+ void EmitPersonalityIndex(unsigned PI) {
assert(PI < ARM::EHABI::NUM_PERSONALITY_INDEX &&
"Invalid personality prefix");
EmitByte(ARM::EHABI::EHT_COMPACT | PI);
}
/// Fill the rest of bytes with FINISH opcode.
- inline void FillFinishOpcode() {
+ void FillFinishOpcode() {
while (Pos < Vec.size())
EmitByte(ARM::EHABI::UNWIND_OPCODE_FINISH);
}
};
-}
+
+} // end anonymous namespace
void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
if (RegSave == 0u)
@@ -153,7 +155,6 @@ void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
void UnwindOpcodeAssembler::Finalize(unsigned &PersonalityIndex,
SmallVectorImpl<uint8_t> &Result) {
-
UnwindOpcodeStreamer OpStreamer(Result);
if (HasPersonality) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index e0c113ecfaa3..a7bfbdf4938e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -16,8 +16,8 @@
#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/DataTypes.h"
+#include <cstddef>
+#include <cstdint>
namespace llvm {
@@ -25,13 +25,12 @@ class MCSymbol;
class UnwindOpcodeAssembler {
private:
- llvm::SmallVector<uint8_t, 32> Ops;
- llvm::SmallVector<unsigned, 8> OpBegins;
- bool HasPersonality;
+ SmallVector<uint8_t, 32> Ops;
+ SmallVector<unsigned, 8> OpBegins;
+ bool HasPersonality = false;
public:
- UnwindOpcodeAssembler()
- : HasPersonality(0) {
+ UnwindOpcodeAssembler() {
OpBegins.push_back(0);
}
@@ -40,12 +39,12 @@ public:
Ops.clear();
OpBegins.clear();
OpBegins.push_back(0);
- HasPersonality = 0;
+ HasPersonality = false;
}
/// Set the personality
void setPersonality(const MCSymbol *Per) {
- HasPersonality = 1;
+ HasPersonality = true;
}
/// Emit unwind opcodes for .save directives
@@ -88,6 +87,6 @@ private:
}
};
-} // namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 166c04b41a77..7ae2f864d79d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -10,23 +10,28 @@
#include "MCTargetDesc/ARMFixupKinds.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/Support/COFF.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
using namespace llvm;
namespace {
+
class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
public:
ARMWinCOFFObjectWriter(bool Is64Bit)
: MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) {
assert(!Is64Bit && "AArch64 support not yet implemented");
}
- ~ARMWinCOFFObjectWriter() override {}
+
+ ~ARMWinCOFFObjectWriter() override = default;
unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsCrossSection,
@@ -35,6 +40,8 @@ public:
bool recordRelocation(const MCFixup &) const override;
};
+} // end anonymous namespace
+
unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
const MCFixup &Fixup,
bool IsCrossSection,
@@ -79,13 +86,13 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16;
}
-}
namespace llvm {
+
MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
bool Is64Bit) {
MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit);
return createWinCOFFObjectWriter(MOTW, OS);
}
-}
+} // end namespace llvm
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 9953c61cd89c..fc083b98395b 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -11,14 +11,36 @@
//
//===----------------------------------------------------------------------===//
-#include "Thumb1FrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "ThumbRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <iterator>
+#include <vector>
using namespace llvm;
@@ -238,9 +260,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
if (HasFP) {
FramePtrOffsetInBlock +=
MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
- .setMIFlags(MachineInstr::FrameSetup));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addReg(ARM::SP)
+ .addImm(FramePtrOffsetInBlock / 4)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
if(FramePtrOffsetInBlock) {
CFAOffset += FramePtrOffsetInBlock;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
@@ -336,14 +360,19 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// will be allocated after this, so we can still use the base pointer
// to reference locals.
if (RegInfo->hasBasePointer(MF))
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
- .addReg(ARM::SP));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
// If the frame has variable sized objects then the epilogue must restore
// the sp from fp. We can assume there's an FP here since hasFP already
// checks for hasVarSizedObjects.
if (MFI.hasVarSizedObjects())
AFI->setShouldRestoreSPFromFP(true);
+
+ // In some cases, virtual registers have been introduced, e.g. by uses of
+ // emitThumbRegPlusImmInReg.
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs);
}
static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) {
@@ -408,13 +437,13 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
"No scratch register to restore SP from FP!");
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
TII, *RegInfo);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- ARM::SP)
- .addReg(ARM::R4));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4)
+ .add(predOps(ARMCC::AL));
} else
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- ARM::SP)
- .addReg(FramePtr));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(FramePtr)
+ .add(predOps(ARMCC::AL));
} else {
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
&MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
@@ -493,12 +522,12 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET)
return true;
MachineInstrBuilder MIB =
- AddDefaultPred(
- BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)));
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))
+ .add(predOps(ARMCC::AL));
// Copy implicit ops and popped registers, if any.
for (auto MO: MBBI->operands())
if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
- MIB.addOperand(MO);
+ MIB.add(MO);
MIB.addReg(ARM::PC, RegState::Define);
// Erase the old instruction (tBX_RET or tPOP).
MBB.erase(MBBI);
@@ -566,22 +595,23 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
if (TemporaryReg) {
assert(!PopReg && "Unnecessary MOV is about to be inserted");
PopReg = PopFriendly.find_first();
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(TemporaryReg, RegState::Define)
- .addReg(PopReg, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(TemporaryReg, RegState::Define)
+ .addReg(PopReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
}
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
// We couldn't use the direct restoration above, so
// perform the opposite conversion: tPOP_RET to tPOP.
MachineInstrBuilder MIB =
- AddDefaultPred(
- BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)));
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL));
bool Popped = false;
for (auto MO: MBBI->operands())
if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
MO.getReg() != ARM::PC) {
- MIB.addOperand(MO);
+ MIB.add(MO);
if (!MO.isImplicit())
Popped = true;
}
@@ -590,23 +620,27 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
MBB.erase(MIB.getInstr());
// Erase the old instruction.
MBB.erase(MBBI);
- MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)));
+ MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))
+ .add(predOps(ARMCC::AL));
}
assert(PopReg && "Do not know how to get LR");
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
.addReg(PopReg, RegState::Define);
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::LR, RegState::Define)
- .addReg(PopReg, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(PopReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
if (TemporaryReg)
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(PopReg, RegState::Define)
- .addReg(TemporaryReg, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(PopReg, RegState::Define)
+ .addReg(TemporaryReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
return true;
}
@@ -667,8 +701,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Push the low registers and lr
if (!LoRegsToSave.empty()) {
- MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
- AddDefaultPred(MIB);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
if (LoRegsToSave.count(Reg)) {
bool isKill = !MF.getRegInfo().isLiveIn(Reg);
@@ -708,8 +742,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
// Create the PUSH, but don't insert it yet (the MOVs need to come first).
- MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH));
- AddDefaultPred(PushMIB);
+ MachineInstrBuilder PushMIB =
+ BuildMI(MF, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
SmallVector<unsigned, 4> RegsToPush;
while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
@@ -719,11 +753,10 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MBB.addLiveIn(*HiRegToSave);
// Emit a MOV from the high reg to the low reg.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr));
- MIB.addReg(*CopyReg, RegState::Define);
- MIB.addReg(*HiRegToSave, getKillRegState(isKill));
- AddDefaultPred(MIB);
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
+ .addReg(*CopyReg, RegState::Define)
+ .addReg(*HiRegToSave, getKillRegState(isKill))
+ .add(predOps(ARMCC::AL));
// Record the register that must be added to the PUSH.
RegsToPush.push_back(*CopyReg);
@@ -735,7 +768,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
}
// Add the low registers to the PUSH, in ascending order.
- for (unsigned Reg : reverse(RegsToPush))
+ for (unsigned Reg : llvm::reverse(RegsToPush))
PushMIB.addReg(Reg, RegState::Kill);
// Insert the PUSH instruction after the MOVs.
@@ -817,19 +850,18 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
// Create the POP instruction.
- MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP));
- AddDefaultPred(PopMIB);
+ MachineInstrBuilder PopMIB =
+ BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
// Add the low register to the POP.
PopMIB.addReg(*CopyReg, RegState::Define);
// Create the MOV from low to high register.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr));
- MIB.addReg(*HiRegToRestore, RegState::Define);
- MIB.addReg(*CopyReg, RegState::Kill);
- AddDefaultPred(MIB);
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
+ .addReg(*HiRegToRestore, RegState::Define)
+ .addReg(*CopyReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
HiRegToRestore =
@@ -837,11 +869,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
}
}
-
-
-
- MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
- AddDefaultPred(MIB);
+ MachineInstrBuilder MIB =
+ BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
bool NeedsPop = false;
for (unsigned i = CSI.size(); i != 0; --i) {
@@ -859,6 +888,16 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// ARMv4T requires BX, see emitEpilogue
if (!STI.hasV5TOps())
continue;
+ // Tailcall optimization failed; change TCRETURN to a tBL
+ if (MI->getOpcode() == ARM::TCRETURNdi ||
+ MI->getOpcode() == ARM::TCRETURNri) {
+ unsigned Opcode = MI->getOpcode() == ARM::TCRETURNdi
+ ? ARM::tBL : ARM::tBLXr;
+ MachineInstrBuilder BL = BuildMI(MF, DL, TII.get(Opcode));
+ BL.add(predOps(ARMCC::AL));
+ BL.add(MI->getOperand(0));
+ MBB.insert(MI, &*BL);
+ }
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
if (MI != MBB.end())
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 4b4fbaab28d9..27bff4d75acf 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -50,20 +50,29 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
|| !ARM::tGPRRegClass.contains(DestReg))
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc)));
+ BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
else {
- // FIXME: The performance consequences of this are going to be atrocious.
- // Some things to try that should be better:
- // * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11
- // * 'movs $dst, $src' if cpsr isn't live
- // See: http://lists.llvm.org/pipermail/llvm-dev/2014-August/075998.html
+ // FIXME: Can also use 'mov hi, $src; mov $dst, hi',
+ // with hi as either r10 or r11.
+
+ const TargetRegisterInfo *RegInfo = st.getRegisterInfo();
+ if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I)
+ == MachineBasicBlock::LQR_Dead) {
+ BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ ->addRegisterDead(ARM::CPSR, RegInfo);
+ return;
+ }
// 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH)))
- .addReg(SrcReg, getKillRegState(KillSrc));
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP)))
- .addReg(DestReg, getDefRegState(true));
+ BuildMI(MBB, I, DL, get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, I, DL, get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .addReg(DestReg, getDefRegState(true));
}
}
@@ -87,9 +96,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::tSTRspi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
}
}
@@ -113,8 +125,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
}
}
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index d01fc8c40ddf..04bdd91b53e6 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -9,13 +9,26 @@
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include <cassert>
+#include <new>
+
using namespace llvm;
#define DEBUG_TYPE "thumb2-it"
@@ -24,16 +37,18 @@ STATISTIC(NumITs, "Number of IT blocks inserted");
STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
namespace {
+
class Thumb2ITBlockPass : public MachineFunctionPass {
public:
static char ID;
- Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
bool restrictIT;
const Thumb2InstrInfo *TII;
const TargetRegisterInfo *TRI;
ARMFunctionInfo *AFI;
+ Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+
bool runOnMachineFunction(MachineFunction &Fn) override;
MachineFunctionProperties getRequiredProperties() const override {
@@ -52,8 +67,10 @@ namespace {
SmallSet<unsigned, 4> &Uses);
bool InsertITInstructions(MachineBasicBlock &MBB);
};
+
char Thumb2ITBlockPass::ID = 0;
-}
+
+} // end anonymous namespace
/// TrackDefUses - Tracking what registers are being defined and used by
/// instructions in the IT block. This also tracks "dependencies", i.e. uses
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 1c731d669eda..818ba85c7d40 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -117,8 +117,9 @@ void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc)));
+ BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
}
void Thumb2InstrInfo::
@@ -138,9 +139,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::t2STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
return;
}
@@ -156,8 +160,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
- MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
- AddDefaultPred(MIB);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL));
return;
}
@@ -180,8 +183,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
return;
}
@@ -198,8 +204,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
- MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
- AddDefaultPred(MIB);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL));
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
@@ -259,10 +264,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
if (Fits) {
if (isSub) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
- .addReg(BaseReg)
- .addReg(DestReg, RegState::Kill)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(BaseReg)
+ .addReg(DestReg, RegState::Kill)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp())
+ .setMIFlags(MIFlags);
} else {
// Here we know that DestReg is not SP but we do not
// know anything about BaseReg. t2ADDrr is an invalid
@@ -270,10 +276,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
// is fine if SP is the first argument. To be sure we
// do not generate invalid encoding, put BaseReg first.
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
- .addReg(BaseReg)
- .addReg(DestReg, RegState::Kill)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(BaseReg)
+ .addReg(DestReg, RegState::Kill)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp())
+ .setMIFlags(MIFlags);
}
return;
}
@@ -284,8 +291,10 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
unsigned Opc = 0;
if (DestReg == ARM::SP && BaseReg != ARM::SP) {
// mov sp, rn. Note t2MOVr cannot be used.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg)
- .addReg(BaseReg).setMIFlags(MIFlags));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg)
+ .setMIFlags(MIFlags)
+ .add(predOps(ARMCC::AL));
BaseReg = ARM::SP;
continue;
}
@@ -296,8 +305,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags));
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg)
+ .addImm(ThisVal / 4)
+ .setMIFlags(MIFlags)
+ .add(predOps(ARMCC::AL));
NumBytes = 0;
continue;
}
@@ -334,12 +346,13 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
}
// Build the new ADD / SUB.
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg, RegState::Kill)
- .addImm(ThisVal)).setMIFlags(MIFlags);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
if (HasCCOut)
- AddDefaultCC(MIB);
+ MIB.add(condCodeOp());
BaseReg = DestReg;
}
@@ -474,7 +487,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
do MI.RemoveOperand(FrameRegIdx+1);
while (MI.getNumOperands() > FrameRegIdx+1);
MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
return true;
}
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 8208e7e24770..c90475c28db7 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -10,20 +10,38 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Function.h" // To access Function attributes
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <iterator>
#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "t2-reduce-size"
@@ -40,6 +58,7 @@ static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
cl::init(-1), cl::Hidden);
namespace {
+
/// ReduceTable - A static table with information on mapping from wide
/// opcodes to narrow
struct ReduceEntry {
@@ -139,11 +158,12 @@ namespace {
class Thumb2SizeReduce : public MachineFunctionPass {
public:
static char ID;
- Thumb2SizeReduce(std::function<bool(const Function &)> Ftor);
const Thumb2InstrInfo *TII;
const ARMSubtarget *STI;
+ Thumb2SizeReduce(std::function<bool(const Function &)> Ftor);
+
bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
@@ -201,19 +221,21 @@ namespace {
struct MBBInfo {
// The flags leaving this block have high latency.
- bool HighLatencyCPSR;
+ bool HighLatencyCPSR = false;
// Has this block been visited yet?
- bool Visited;
+ bool Visited = false;
- MBBInfo() : HighLatencyCPSR(false), Visited(false) {}
+ MBBInfo() = default;
};
SmallVector<MBBInfo, 8> BlockInfo;
std::function<bool(const Function &)> PredicateFtor;
};
+
char Thumb2SizeReduce::ID = 0;
-}
+
+} // end anonymous namespace
Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
: MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
@@ -490,14 +512,13 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
isLdStMul = true;
break;
}
- case ARM::t2STMIA: {
+ case ARM::t2STMIA:
// If the base register is killed, we don't care what its value is after the
// instruction, so we can use an updating STMIA.
if (!MI->getOperand(0).isKill())
return false;
break;
- }
case ARM::t2LDMIA_RET: {
unsigned BaseReg = MI->getOperand(1).getReg();
if (BaseReg != ARM::SP)
@@ -562,8 +583,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
if (!isLdStMul) {
- MIB.addOperand(MI->getOperand(0));
- MIB.addOperand(MI->getOperand(1));
+ MIB.add(MI->getOperand(0));
+ MIB.add(MI->getOperand(1));
if (HasImmOffset)
MIB.addImm(OffsetImm / Scale);
@@ -577,7 +598,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Transfer the rest of operands.
for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
- MIB.addOperand(MI->getOperand(OpNum));
+ MIB.add(MI->getOperand(OpNum));
// Transfer memoperands.
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
@@ -621,12 +642,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
return false;
- MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
- TII->get(ARM::tADDrSPi))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
- AddDefaultPred(MIB);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(ARM::tADDrSPi))
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
+ .add(predOps(ARMCC::AL));
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
@@ -652,11 +674,10 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
switch (Opc) {
default: break;
- case ARM::t2ADDSri: {
+ case ARM::t2ADDSri:
if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
return true;
LLVM_FALLTHROUGH;
- }
case ARM::t2ADDSrr:
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
}
@@ -698,7 +719,6 @@ bool
Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
bool LiveCPSR, bool IsSelfLoop) {
-
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
@@ -785,13 +805,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
- MIB.addOperand(MI->getOperand(0));
- if (NewMCID.hasOptionalDef()) {
- if (HasCC)
- AddDefaultT1CC(MIB, CCDead);
- else
- AddNoT1CC(MIB);
- }
+ MIB.add(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef())
+ MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
// Transfer the rest of operands.
unsigned NumOps = MCID.getNumOperands();
@@ -800,7 +816,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
continue;
if (SkipPred && MCID.OpInfo[i].isPredicate())
continue;
- MIB.addOperand(MI->getOperand(i));
+ MIB.add(MI->getOperand(i));
}
// Transfer MI flags.
@@ -880,13 +896,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
- MIB.addOperand(MI->getOperand(0));
- if (NewMCID.hasOptionalDef()) {
- if (HasCC)
- AddDefaultT1CC(MIB, CCDead);
- else
- AddNoT1CC(MIB);
- }
+ MIB.add(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef())
+ MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
// Transfer the rest of operands.
unsigned NumOps = MCID.getNumOperands();
@@ -909,10 +921,10 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Skip implicit def of CPSR. Either it's modeled as an optional
// def now or it's already an implicit def on the new instruction.
continue;
- MIB.addOperand(MO);
+ MIB.add(MO);
}
if (!MCID.isPredicable() && NewMCID.isPredicable())
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp
index 2efd63b84a2c..15a567523336 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -93,9 +93,10 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
- .addReg(DestReg, getDefRegState(true), SubIdx)
- .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
}
/// emitLoadConstPool - Emits a load from constpool to materialize the
@@ -145,14 +146,17 @@ static void emitThumbRegPlusImmInReg(
LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)
+ .add(t1CondCodeOp())
.addImm(NumBytes)
.setMIFlags(MIFlags);
} else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) {
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)
+ .add(t1CondCodeOp())
.addImm(NumBytes)
.setMIFlags(MIFlags);
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)
+ .add(t1CondCodeOp())
.addReg(LdReg, RegState::Kill)
.setMIFlags(MIFlags);
} else if (ST.genExecuteOnly()) {
@@ -167,12 +171,12 @@ static void emitThumbRegPlusImmInReg(
: ((isHigh || !CanChangeCC) ? ARM::tADDhirr : ARM::tADDrr);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
if (Opc != ARM::tADDhirr)
- MIB = AddDefaultT1CC(MIB);
+ MIB = MIB.add(t1CondCodeOp());
if (DestReg == ARM::SP || isSub)
MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
else
MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
@@ -307,12 +311,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg);
if (CopyNeedsCC)
- MIB = AddDefaultT1CC(MIB);
+ MIB = MIB.add(t1CondCodeOp());
MIB.addReg(BaseReg, RegState::Kill);
if (CopyOpc != ARM::tMOVr) {
MIB.addImm(CopyImm);
}
- AddDefaultPred(MIB.setMIFlags(MIFlags));
+ MIB.setMIFlags(MIFlags).add(predOps(ARMCC::AL));
BaseReg = DestReg;
}
@@ -324,10 +328,11 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg);
if (ExtraNeedsCC)
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(BaseReg).addImm(ExtraImm);
- MIB = AddDefaultPred(MIB);
- MIB.setMIFlags(MIFlags);
+ MIB = MIB.add(t1CondCodeOp());
+ MIB.addReg(BaseReg)
+ .addImm(ExtraImm)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
}
}
@@ -460,9 +465,10 @@ bool ThumbRegisterInfo::saveScavengerRegister(
// a call clobbered register that we know won't be used in Thumb1 mode.
const TargetInstrInfo &TII = *STI.getInstrInfo();
DebugLoc DL;
- AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
- .addReg(ARM::R12, RegState::Define)
- .addReg(Reg, RegState::Kill));
+ BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(Reg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
// The UseMI is where we would like to restore the register. If there's
// interference with R12 before then, however, we'll need to restore it
@@ -490,8 +496,10 @@ bool ThumbRegisterInfo::saveScavengerRegister(
}
}
// Restore the register from R12
- AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)).
- addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill));
+ BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr))
+ .addReg(Reg, RegState::Define)
+ .addReg(ARM::R12, RegState::Kill)
+ .add(predOps(ARMCC::AL));
return true;
}
@@ -621,5 +629,5 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Add predicate back if it's needed.
if (MI.isPredicable())
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp
index 4afdd3a0ec08..50bb50b44f27 100644
--- a/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -130,7 +130,8 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
}
}
- printOperand(MI, OpNum, O);
+ if (Error)
+ printOperand(MI, OpNum, O);
return false;
}
diff --git a/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 1b2f2cec0bca..13080a5d72f0 100644
--- a/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -509,8 +509,8 @@ bool AVRExpandPseudo::expand<AVR::LDIWRdK>(Block &MBB, BlockIt MBBI) {
const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
unsigned TF = MI.getOperand(1).getTargetFlags();
- MIBLO.addOperand(MachineOperand::CreateBA(BA, TF | AVRII::MO_LO));
- MIBHI.addOperand(MachineOperand::CreateBA(BA, TF | AVRII::MO_HI));
+ MIBLO.add(MachineOperand::CreateBA(BA, TF | AVRII::MO_LO));
+ MIBHI.add(MachineOperand::CreateBA(BA, TF | AVRII::MO_HI));
break;
}
case MachineOperand::MO_Immediate: {
@@ -785,9 +785,8 @@ bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode,
auto Op1 = MI.getOperand(0);
auto Op2 = MI.getOperand(1);
- MachineInstr &NewInst = *buildMI(MBB, MBBI, Opcode)
- .addOperand(Op1).addOperand(Op2)
- .getInstr();
+ MachineInstr &NewInst =
+ *buildMI(MBB, MBBI, Opcode).add(Op1).add(Op2).getInstr();
f(NewInst);
});
}
@@ -810,15 +809,13 @@ bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width,
unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
// Create the load
- buildMI(MBB, MBBI, LoadOpcode).addOperand(Op1).addOperand(Op2);
+ buildMI(MBB, MBBI, LoadOpcode).add(Op1).add(Op2);
// Create the arithmetic op
- buildMI(MBB, MBBI, ArithOpcode)
- .addOperand(Op1).addOperand(Op1)
- .addOperand(Op2);
+ buildMI(MBB, MBBI, ArithOpcode).add(Op1).add(Op1).add(Op2);
// Create the store
- buildMI(MBB, MBBI, StoreOpcode).addOperand(Op2).addOperand(Op1);
+ buildMI(MBB, MBBI, StoreOpcode).add(Op2).add(Op1);
});
}
diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp
index 07fc3f6890b8..0b95d3819399 100644
--- a/lib/Target/AVR/AVRISelLowering.cpp
+++ b/lib/Target/AVR/AVRISelLowering.cpp
@@ -48,6 +48,8 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
setOperationAction(ISD::BlockAddress, MVT::i16, Custom);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
@@ -311,7 +313,7 @@ SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
unsigned Opcode = Op->getOpcode();
assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
"Invalid opcode for Div/Rem lowering");
- bool isSigned = (Opcode == ISD::SDIVREM);
+ bool IsSigned = (Opcode == ISD::SDIVREM);
EVT VT = Op->getValueType(0);
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
@@ -320,16 +322,16 @@ SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("Unexpected request for libcall!");
case MVT::i8:
- LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
+ LC = IsSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
break;
case MVT::i16:
- LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
+ LC = IsSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
break;
case MVT::i32:
- LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
+ LC = IsSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
break;
case MVT::i64:
- LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64;
+ LC = IsSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64;
break;
}
@@ -340,8 +342,8 @@ SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
for (SDValue const &Value : Op->op_values()) {
Entry.Node = Value;
Entry.Ty = Value.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = IsSigned;
+ Entry.IsZExt = !IsSigned;
Args.push_back(Entry);
}
@@ -354,10 +356,10 @@ SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(InChain)
- .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setInRegister()
- .setSExtResult(isSigned)
- .setZExtResult(!isSigned);
+ .setSExtResult(IsSigned)
+ .setZExtResult(!IsSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return CallInfo.first;
@@ -932,6 +934,12 @@ static void analyzeStandardArguments(TargetLowering::CallLoweringInfo *CLI,
bool UsesStack = false;
for (unsigned i = 0, pos = 0, e = Args.size(); i != e; ++i) {
unsigned Size = Args[i];
+
+ // If we have a zero-sized argument, don't attempt to lower it.
+ // AVR-GCC does not support zero-sized arguments and so we need not
+ // worry about ABI compatibility.
+ if (Size == 0) continue;
+
MVT LocVT = (IsCall) ? (*Outs)[pos].VT : (*Ins)[pos].VT;
// If we have plenty of regs to pass the whole argument do it.
@@ -1373,7 +1381,7 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Don't emit the ret/reti instruction when the naked attribute is present in
// the function being compiled.
if (MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::Naked)) {
+ AttributeList::FunctionIndex, Attribute::Naked)) {
return Chain;
}
@@ -1975,4 +1983,3 @@ unsigned AVRTargetLowering::getRegisterByName(const char *RegName,
}
} // end of namespace llvm
-
diff --git a/lib/Target/AVR/AVRInstrInfo.td b/lib/Target/AVR/AVRInstrInfo.td
index bc66379ab708..693d80a1c06f 100644
--- a/lib/Target/AVR/AVRInstrInfo.td
+++ b/lib/Target/AVR/AVRInstrInfo.td
@@ -694,7 +694,7 @@ Defs = [SREG] in
}
//===----------------------------------------------------------------------===//
-// One's/Two's Compliment
+// One's/Two's Complement
//===----------------------------------------------------------------------===//
let Constraints = "$src = $rd",
Defs = [SREG] in
@@ -1718,7 +1718,7 @@ Defs = [SREG] in
(implicit SREG)]>;
// CBR Rd, K
- // Alias for `ANDI Rd, COM(K)` where COM(K) is the compliment of K.
+ // Alias for `ANDI Rd, COM(K)` where COM(K) is the complement of K.
// FIXME: This uses the 'complement' encoder. We need it to also use the
// imm_ldi8 encoder. This will cause no fixups to be created on this instruction.
def CBRRdK : FRdK<0b0111,
diff --git a/lib/Target/AVR/AVRInstrumentFunctions.cpp b/lib/Target/AVR/AVRInstrumentFunctions.cpp
index 5553dc2da31b..e7fca74e1701 100644
--- a/lib/Target/AVR/AVRInstrumentFunctions.cpp
+++ b/lib/Target/AVR/AVRInstrumentFunctions.cpp
@@ -96,7 +96,7 @@ static void BuildSignatureCall(StringRef SymName, BasicBlock &BB, Function &F) {
Value *FunctionName = CreateStringPtr(BB, F.getName());
Value *Args[] = {FunctionName,
- ConstantInt::get(I16, F.getArgumentList().size())};
+ ConstantInt::get(I16, F.arg_size())};
CallInst::Create(Fn, Args, "", &BB);
}
diff --git a/lib/Target/AVR/AVRMCInstLower.cpp b/lib/Target/AVR/AVRMCInstLower.cpp
index 342fe558813a..475dda420e89 100644
--- a/lib/Target/AVR/AVRMCInstLower.cpp
+++ b/lib/Target/AVR/AVRMCInstLower.cpp
@@ -56,7 +56,7 @@ void AVRMCInstLower::lowerInstruction(const MachineInstr &MI, MCInst &OutMI) con
switch (MO.getType()) {
default:
- MI.dump();
+ MI.print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index 081d8b5740ef..5c3b45ac2328 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -335,7 +335,7 @@ MCObjectWriter *AVRAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
void AVRAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
if (Value == 0)
return; // Doesn't change encoding.
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index 7ff4b8f350f6..f2be2494684a 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -41,7 +41,7 @@ public:
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
index 481de320b22f..713754821005 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
@@ -1,5 +1,7 @@
#include "AVRELFStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
index cca3bcc4968a..9f2ee8cf8035 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
@@ -23,6 +23,7 @@ AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT) {
CommentString = ";";
PrivateGlobalPrefix = ".L";
UsesELFSectionDirectiveForBSS = true;
+ UseIntegratedAssembler = true;
}
} // end of namespace llvm
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
index e6dc8868c705..c3d43ebb407e 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
@@ -25,6 +25,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "mccodeemitter"
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
index 5fa425c296a5..4cee8d904c9d 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
@@ -63,7 +63,7 @@ private:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- /// Takes the compliment of a number (~0 - val).
+ /// Takes the complement of a number (~0 - val).
unsigned encodeComplement(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 12091449cc11..279cdb1a89b4 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -71,7 +71,7 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
// Addresses of the form Addr+const or Addr|const
if (CurDAG->isBaseWithConstantOffset(Addr)) {
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<32>(CN->getSExtValue())) {
+ if (isInt<16>(CN->getSExtValue())) {
// If the first operand is a FI, get the TargetFI Node
if (FrameIndexSDNode *FIN =
@@ -99,7 +99,7 @@ bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset)
// Addresses of the form Addr+const or Addr|const
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<32>(CN->getSExtValue())) {
+ if (isInt<16>(CN->getSExtValue())) {
// If the first operand is a FI, get the TargetFI Node
if (FrameIndexSDNode *FIN =
@@ -138,7 +138,7 @@ void BPFDAGToDAGISel::Select(SDNode *Node) {
else
errs() << "Error: ";
errs() << "Unsupport signed division for DAG: ";
- Node->dump(CurDAG);
+ Node->print(errs(), CurDAG);
errs() << "Please convert to unsigned div/mod.\n";
break;
}
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index cca3492a1992..b9b3dff95c0a 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
#define DEBUG_TYPE "bpf-lower"
-static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
+static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg) {
MachineFunction &MF = DAG.getMachineFunction();
DAG.getContext()->diagnose(
DiagnosticInfoUnsupported(*MF.getFunction(), Msg, DL.getDebugLoc()));
@@ -306,11 +306,23 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ auto GV = G->getGlobal();
+ fail(CLI.DL, DAG,
+ "A call to global function '" + StringRef(GV->getName())
+ + "' is not supported. "
+ + (GV->isDeclaration() ?
+ "Only calls to predefined BPF helpers are allowed." :
+ "Please use __attribute__((always_inline) to make sure"
+ " this function is inlined."));
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT,
G->getOffset(), 0);
- else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
+ fail(CLI.DL, DAG, Twine("A call to built-in function '"
+ + StringRef(E->getSymbol())
+ + "' is not supported."));
+ }
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td
index a7910dea98de..93ee24371c4d 100644
--- a/lib/Target/BPF/BPFInstrInfo.td
+++ b/lib/Target/BPF/BPFInstrInfo.td
@@ -470,6 +470,7 @@ def : Pat<(i64 (and (i64 GPR:$src), 0xffffFFFF)),
// Calls
def : Pat<(BPFcall tglobaladdr:$dst), (JAL tglobaladdr:$dst)>;
+def : Pat<(BPFcall texternalsym:$dst), (JAL texternalsym:$dst)>;
def : Pat<(BPFcall imm:$dst), (JAL imm:$dst)>;
// Loads
diff --git a/lib/Target/BPF/BPFMCInstLower.cpp b/lib/Target/BPF/BPFMCInstLower.cpp
index f64defecf3cc..c8528e867310 100644
--- a/lib/Target/BPF/BPFMCInstLower.cpp
+++ b/lib/Target/BPF/BPFMCInstLower.cpp
@@ -29,6 +29,11 @@ BPFMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
return Printer.getSymbol(MO.getGlobal());
}
+MCSymbol *
+BPFMCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const {
+ return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
MCOperand BPFMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
MCSymbol *Sym) const {
@@ -49,7 +54,7 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOperand MCOp;
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
@@ -66,6 +71,9 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
case MachineOperand::MO_RegisterMask:
continue;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ break;
case MachineOperand::MO_GlobalAddress:
MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
break;
diff --git a/lib/Target/BPF/BPFMCInstLower.h b/lib/Target/BPF/BPFMCInstLower.h
index 054e89407db2..eac811f4cf88 100644
--- a/lib/Target/BPF/BPFMCInstLower.h
+++ b/lib/Target/BPF/BPFMCInstLower.h
@@ -37,6 +37,7 @@ public:
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
};
}
diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp
index 71846e3e92c9..7925bee9c587 100644
--- a/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
#define GET_REGINFO_TARGET_DESC
#include "BPFGenRegisterInfo.inc"
@@ -41,6 +42,18 @@ BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+static void WarnSize(int Offset, MachineFunction &MF, DebugLoc& DL)
+{
+ if (Offset <= -512) {
+ auto F = MF.getFunction();
+ DiagnosticInfoUnsupported DiagStackSize(*F,
+ "Looks like the BPF stack limit of 512 bytes is exceeded. "
+ "Please move large on stack variables into BPF per-cpu array map.\n",
+ DL);
+ F->getContext().diagnose(DiagStackSize);
+ }
+}
+
void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
@@ -48,9 +61,18 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned i = 0;
MachineInstr &MI = *II;
- MachineFunction &MF = *MI.getParent()->getParent();
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
DebugLoc DL = MI.getDebugLoc();
+ if (!DL)
+ /* try harder to get some debug loc */
+ for (auto &I : MBB)
+ if (I.getDebugLoc()) {
+ DL = I.getDebugLoc();
+ break;
+ }
+
while (!MI.getOperand(i).isFI()) {
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
@@ -59,11 +81,11 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned FrameReg = getFrameRegister(MF);
int FrameIndex = MI.getOperand(i).getIndex();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- MachineBasicBlock &MBB = *MI.getParent();
if (MI.getOpcode() == BPF::MOV_rr) {
int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex);
+ WarnSize(Offset, MF, DL);
MI.getOperand(i).ChangeToRegister(FrameReg, false);
unsigned reg = MI.getOperand(i - 1).getReg();
BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg)
@@ -78,6 +100,8 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!isInt<32>(Offset))
llvm_unreachable("bug in frame offset");
+ WarnSize(Offset, MF, DL);
+
if (MI.getOpcode() == BPF::FI_ri) {
// architecture does not really support FI_ri, replace it with
// MOV_rr <target_reg>, frame_reg
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index afc321ea2c34..1f355171ebd3 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -28,7 +28,7 @@ public:
~BPFAsmBackend() override = default;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -62,8 +62,8 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
}
void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ unsigned DataSize, uint64_t Value, bool IsPCRel,
+ MCContext &Ctx) const {
if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
assert(Value == 0);
} else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) {
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 044db10fb3fa..1e6abfacb792 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -17,3 +17,9 @@ foreach(t ${LLVM_TARGETS_TO_BUILD})
message(STATUS "Targeting ${t}")
add_subdirectory(${t})
endforeach()
+
+# Currently we do not allow libraries from lib to reference targets directly.
+# This property is used to enforce that convention. It is important because the
+# logic in llvm_map_components_to_libnames is order dependent on the target
+# libraries being created.
+set_property(GLOBAL PROPERTY LLVM_TARGETS_CONFIGURED On)
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index becc086c81b0..4bbc36a86e5b 100644
--- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -63,21 +63,25 @@ using namespace llvm;
static cl::opt<bool> EnableFutureRegs("mfuture-regs",
cl::desc("Enable future registers"));
-static cl::opt<bool> WarnMissingParenthesis("mwarn-missing-parenthesis",
-cl::desc("Warn for missing parenthesis around predicate registers"),
-cl::init(true));
-static cl::opt<bool> ErrorMissingParenthesis("merror-missing-parenthesis",
-cl::desc("Error for missing parenthesis around predicate registers"),
-cl::init(false));
-static cl::opt<bool> WarnSignedMismatch("mwarn-sign-mismatch",
-cl::desc("Warn for mismatching a signed and unsigned value"),
-cl::init(true));
-static cl::opt<bool> WarnNoncontigiousRegister("mwarn-noncontigious-register",
-cl::desc("Warn for register names that arent contigious"),
-cl::init(true));
-static cl::opt<bool> ErrorNoncontigiousRegister("merror-noncontigious-register",
-cl::desc("Error for register names that aren't contigious"),
-cl::init(false));
+static cl::opt<bool> WarnMissingParenthesis(
+ "mwarn-missing-parenthesis",
+ cl::desc("Warn for missing parenthesis around predicate registers"),
+ cl::init(true));
+static cl::opt<bool> ErrorMissingParenthesis(
+ "merror-missing-parenthesis",
+ cl::desc("Error for missing parenthesis around predicate registers"),
+ cl::init(false));
+static cl::opt<bool> WarnSignedMismatch(
+ "mwarn-sign-mismatch",
+ cl::desc("Warn for mismatching a signed and unsigned value"),
+ cl::init(true));
+static cl::opt<bool> WarnNoncontigiousRegister(
+ "mwarn-noncontigious-register",
+ cl::desc("Warn for register names that arent contigious"), cl::init(true));
+static cl::opt<bool> ErrorNoncontigiousRegister(
+ "merror-noncontigious-register",
+ cl::desc("Error for register names that aren't contigious"),
+ cl::init(false));
namespace {
@@ -123,9 +127,11 @@ class HexagonAsmParser : public MCTargetAsmParser {
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- uint64_t &ErrorInfo, bool MatchingInlineAsm) override;
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override;
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+ unsigned Kind) override;
bool OutOfRange(SMLoc IDLoc, long long Val, long long Max);
int processInstruction(MCInst &Inst, OperandVector const &Operands,
SMLoc IDLoc);
@@ -168,11 +174,10 @@ public:
bool parseInstruction(OperandVector &Operands);
bool implicitExpressionLocation(OperandVector &Operands);
bool parseExpressionOrOperand(OperandVector &Operands);
- bool parseExpression(MCExpr const *& Expr);
+ bool parseExpression(MCExpr const *&Expr);
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc, OperandVector &Operands) override
- {
+ SMLoc NameLoc, OperandVector &Operands) override {
llvm_unreachable("Unimplemented");
}
@@ -289,45 +294,63 @@ public:
return false;
}
- bool isf32Ext() const { return false; }
- bool iss32_0Imm() const { return CheckImmRange(32, 0, true, true, false); }
+ bool isa30_2Imm() const { return CheckImmRange(30, 2, true, true, true); }
+ bool isb30_2Imm() const { return CheckImmRange(30, 2, true, true, true); }
+ bool isb15_2Imm() const { return CheckImmRange(15, 2, true, true, false); }
+ bool isb13_2Imm() const { return CheckImmRange(13, 2, true, true, false); }
+
+ bool ism32_0Imm() const { return true; }
+
+ bool isf32Imm() const { return false; }
+ bool isf64Imm() const { return false; }
+ bool iss32_0Imm() const { return true; }
+ bool iss31_1Imm() const { return true; }
+ bool iss30_2Imm() const { return true; }
+ bool iss29_3Imm() const { return true; }
bool iss23_2Imm() const { return CheckImmRange(23, 2, true, true, false); }
+ bool iss10_0Imm() const { return CheckImmRange(10, 0, true, false, false); }
+ bool iss10_6Imm() const { return CheckImmRange(10, 6, true, false, false); }
+ bool iss9_0Imm() const { return CheckImmRange(9, 0, true, false, false); }
bool iss8_0Imm() const { return CheckImmRange(8, 0, true, false, false); }
bool iss8_0Imm64() const { return CheckImmRange(8, 0, true, true, false); }
bool iss7_0Imm() const { return CheckImmRange(7, 0, true, false, false); }
bool iss6_0Imm() const { return CheckImmRange(6, 0, true, false, false); }
+ bool iss6_3Imm() const { return CheckImmRange(6, 3, true, false, false); }
bool iss4_0Imm() const { return CheckImmRange(4, 0, true, false, false); }
bool iss4_1Imm() const { return CheckImmRange(4, 1, true, false, false); }
bool iss4_2Imm() const { return CheckImmRange(4, 2, true, false, false); }
bool iss4_3Imm() const { return CheckImmRange(4, 3, true, false, false); }
- bool iss4_6Imm() const { return CheckImmRange(4, 0, true, false, false); }
- bool iss3_6Imm() const { return CheckImmRange(3, 0, true, false, false); }
bool iss3_0Imm() const { return CheckImmRange(3, 0, true, false, false); }
bool isu64_0Imm() const { return CheckImmRange(64, 0, false, true, true); }
- bool isu32_0Imm() const { return CheckImmRange(32, 0, false, true, false); }
+ bool isu32_0Imm() const { return true; }
+ bool isu31_1Imm() const { return true; }
+ bool isu30_2Imm() const { return true; }
+ bool isu29_3Imm() const { return true; }
bool isu26_6Imm() const { return CheckImmRange(26, 6, false, true, false); }
bool isu16_0Imm() const { return CheckImmRange(16, 0, false, true, false); }
bool isu16_1Imm() const { return CheckImmRange(16, 1, false, true, false); }
bool isu16_2Imm() const { return CheckImmRange(16, 2, false, true, false); }
bool isu16_3Imm() const { return CheckImmRange(16, 3, false, true, false); }
bool isu11_3Imm() const { return CheckImmRange(11, 3, false, false, false); }
- bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); }
- bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); }
- bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); }
bool isu10_0Imm() const { return CheckImmRange(10, 0, false, false, false); }
bool isu9_0Imm() const { return CheckImmRange(9, 0, false, false, false); }
bool isu8_0Imm() const { return CheckImmRange(8, 0, false, false, false); }
bool isu7_0Imm() const { return CheckImmRange(7, 0, false, false, false); }
bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); }
+ bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); }
+ bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); }
+ bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); }
bool isu5_0Imm() const { return CheckImmRange(5, 0, false, false, false); }
+ bool isu5_2Imm() const { return CheckImmRange(5, 2, false, false, false); }
+ bool isu5_3Imm() const { return CheckImmRange(5, 3, false, false, false); }
bool isu4_0Imm() const { return CheckImmRange(4, 0, false, false, false); }
+ bool isu4_2Imm() const { return CheckImmRange(4, 2, false, false, false); }
bool isu3_0Imm() const { return CheckImmRange(3, 0, false, false, false); }
+ bool isu3_1Imm() const { return CheckImmRange(3, 1, false, false, false); }
bool isu2_0Imm() const { return CheckImmRange(2, 0, false, false, false); }
bool isu1_0Imm() const { return CheckImmRange(1, 0, false, false, false); }
- bool ism6_0Imm() const { return CheckImmRange(6, 0, false, false, false); }
- bool isn8_0Imm() const { return CheckImmRange(8, 0, false, false, false); }
bool isn1Const() const {
if (!isImm())
return false;
@@ -336,35 +359,18 @@ public:
return false;
return Value == -1;
}
-
- bool iss16_0Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); }
- bool iss12_0Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); }
- bool iss10_0Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); }
- bool iss9_0Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); }
- bool iss8_0Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); }
- bool iss7_0Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); }
- bool iss6_0Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); }
- bool iss11_0Ext() const {
+ bool iss11_0Imm() const {
return CheckImmRange(11 + 26, 0, true, true, true);
}
- bool iss11_1Ext() const {
+ bool iss11_1Imm() const {
return CheckImmRange(11 + 26, 1, true, true, true);
}
- bool iss11_2Ext() const {
+ bool iss11_2Imm() const {
return CheckImmRange(11 + 26, 2, true, true, true);
}
- bool iss11_3Ext() const {
+ bool iss11_3Imm() const {
return CheckImmRange(11 + 26, 3, true, true, true);
}
-
- bool isu7_0Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); }
- bool isu8_0Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); }
- bool isu9_0Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); }
- bool isu10_0Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); }
- bool isu6_0Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
- bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); }
- bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); }
- bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); }
bool isu32_0MustExt() const { return isImm(); }
void addRegOperands(MCInst &Inst, unsigned N) const {
@@ -392,188 +398,10 @@ public:
Inst.addOperand(MCOperand::createExpr(Expr));
}
- void addf32ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
-
- void adds32_0ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds23_2ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds8_0ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds8_0Imm64Operands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds6_0ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds4_0ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds4_1ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds4_2ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds4_3ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds3_0ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
-
- void addu64_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu32_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu26_6ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu16_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu16_1ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu16_2ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu16_3ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu11_3ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu10_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu9_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu8_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu7_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_1ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_2ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_3ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu5_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu4_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu3_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu2_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu1_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
-
- void addm6_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addn8_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
-
- void adds16_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds12_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds10_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds9_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds8_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds6_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds11_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds11_1ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds11_2ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds11_3ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
void addn1ConstOperands(MCInst &Inst, unsigned N) const {
addImmOperands(Inst, N);
}
- void addu7_0ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu8_0ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu9_0ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu10_0ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_0ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_1ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_2ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_3ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu32_0MustExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
-
- void adds4_6ImmOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE =
- dyn_cast<MCConstantExpr>(&HexagonMCInstrInfo::getExpr(*getImm()));
- Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
- }
-
- void adds3_6ImmOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE =
- dyn_cast<MCConstantExpr>(&HexagonMCInstrInfo::getExpr(*getImm()));
- Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
- }
-
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
@@ -749,10 +577,6 @@ bool HexagonAsmParser::matchBundleOptions() {
HexagonMCInstrInfo::setInnerLoop(MCB);
else if (Option.compare_lower("endloop1") == 0)
HexagonMCInstrInfo::setOuterLoop(MCB);
- else if (Option.compare_lower("mem_noshuf") == 0)
- HexagonMCInstrInfo::setMemReorderDisabled(MCB);
- else if (Option.compare_lower("mem_shuf") == 0)
- HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB);
else
return true;
Lex();
@@ -770,8 +594,7 @@ void HexagonAsmParser::canonicalizeImmediates(MCInst &MCI) {
int64_t Value (I.getImm());
NewInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create(
MCConstantExpr::create(Value, getContext()), getContext())));
- }
- else {
+ } else {
if (I.isExpr() && cast<HexagonMCExpr>(I.getExpr())->signMismatch() &&
WarnSignedMismatch)
Warning (MCI.getLoc(), "Signed/Unsigned mismatch");
@@ -1066,6 +889,9 @@ bool HexagonAsmParser::ParseDirectiveComm(bool IsLocal, SMLoc Loc) {
// validate register against architecture
bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const {
+ if (HexagonMCRegisterClasses[Hexagon::V62RegsRegClassID].contains(MatchNum))
+ if (!getSTI().getFeatureBits()[Hexagon::ArchV62])
+ return false;
return true;
}
@@ -1171,11 +997,15 @@ bool HexagonAsmParser::parseOperand(OperandVector &Operands) {
bool HexagonAsmParser::isLabel(AsmToken &Token) {
MCAsmLexer &Lexer = getLexer();
AsmToken const &Second = Lexer.getTok();
- AsmToken Third = Lexer.peekTok();
+ AsmToken Third = Lexer.peekTok();
StringRef String = Token.getString();
if (Token.is(AsmToken::TokenKind::LCurly) ||
Token.is(AsmToken::TokenKind::RCurly))
return false;
+ // special case for parsing vwhist256:sat
+ if (String.lower() == "vwhist256" && Second.is(AsmToken::Colon) &&
+ Third.getString().lower() == "sat")
+ return false;
if (!Token.is(AsmToken::TokenKind::Identifier))
return true;
if (!matchRegister(String.lower()))
@@ -1756,8 +1586,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
TmpInst.setOpcode(Hexagon::L2_loadrdgp);
TmpInst.addOperand(MO_0);
- TmpInst.addOperand(
- MCOperand::createExpr(MCSymbolRefExpr::create(Sym, getContext())));
+ TmpInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create(
+ MCSymbolRefExpr::create(Sym, getContext()), getContext())));
Inst = TmpInst;
}
}
@@ -2142,6 +1972,67 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
Inst = TmpInst;
break;
}
+ case Hexagon::PS_loadrubabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(1).getExpr()))
+ Inst.setOpcode(Hexagon::L2_loadrubgp);
+ break;
+ case Hexagon::PS_loadrbabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(1).getExpr()))
+ Inst.setOpcode(Hexagon::L2_loadrbgp);
+ break;
+ case Hexagon::PS_loadruhabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(1).getExpr()))
+ Inst.setOpcode(Hexagon::L2_loadruhgp);
+ break;
+ case Hexagon::PS_loadrhabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(1).getExpr()))
+ Inst.setOpcode(Hexagon::L2_loadrhgp);
+ break;
+ case Hexagon::PS_loadriabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(1).getExpr()))
+ Inst.setOpcode(Hexagon::L2_loadrigp);
+ break;
+ case Hexagon::PS_loadrdabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(1).getExpr()))
+ Inst.setOpcode(Hexagon::L2_loadrdgp);
+ break;
+ case Hexagon::PS_storerbabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerbgp);
+ break;
+ case Hexagon::PS_storerhabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerhgp);
+ break;
+ case Hexagon::PS_storerfabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerfgp);
+ break;
+ case Hexagon::PS_storeriabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerigp);
+ break;
+ case Hexagon::PS_storerdabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerdgp);
+ break;
+ case Hexagon::PS_storerbnewabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerbnewgp);
+ break;
+ case Hexagon::PS_storerhnewabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerhnewgp);
+ break;
+ case Hexagon::PS_storerinewabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerinewgp);
+ break;
+ case Hexagon::A2_zxtb: {
+ Inst.setOpcode(Hexagon::A2_andir);
+ Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(255, Context)));
+ break;
+ }
} // switch
return Match_Success;
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index 963fb99ce09b..61d3630ac095 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -317,6 +317,15 @@ bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
return true;
}
+BT::RegisterCell &BT::RegisterCell::regify(unsigned R) {
+ for (unsigned i = 0, n = width(); i < n; ++i) {
+ const BitValue &V = Bits[i];
+ if (V.Type == BitValue::Ref && V.RefI.Reg == 0)
+ Bits[i].RefI = BitRef(R, i);
+ }
+ return *this;
+}
+
uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
// The general problem is with finding a register class that corresponds
// to a given reference reg:sub. There can be several such classes, and
@@ -378,12 +387,7 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
return;
assert(RR.Sub == 0 && "Unexpected sub-register in definition");
// Eliminate all ref-to-reg-0 bit values: replace them with "self".
- for (unsigned i = 0, n = RC.width(); i < n; ++i) {
- const BitValue &V = RC[i];
- if (V.Type == BitValue::Ref && V.RefI.Reg == 0)
- RC[i].RefI = BitRef(RR.Reg, i);
- }
- M[RR.Reg] = RC;
+ M[RR.Reg] = RC.regify(RR.Reg);
}
// Check if the cell represents a compile-time integer value.
diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h
index 48c5f2266acf..a547b34e852f 100644
--- a/lib/Target/Hexagon/BitTracker.h
+++ b/lib/Target/Hexagon/BitTracker.h
@@ -283,6 +283,9 @@ struct BitTracker::RegisterCell {
return !operator==(RC);
}
+ // Replace the ref-to-reg-0 bit values with the given register.
+ RegisterCell &regify(unsigned R);
+
// Generate a "ref" cell for the corresponding register. In the resulting
// cell each bit will be described as being the same as the corresponding
// bit in register Reg (i.e. the cell is "defined" by register Reg).
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 2c527d598628..2f3dd3326fcc 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -35,6 +35,7 @@ add_llvm_target(HexagonCodeGen
HexagonInstrInfo.cpp
HexagonISelDAGToDAG.cpp
HexagonISelLowering.cpp
+ HexagonLoopIdiomRecognition.cpp
HexagonMachineFunctionInfo.cpp
HexagonMachineScheduler.cpp
HexagonMCInstLower.cpp
@@ -58,10 +59,10 @@ add_llvm_target(HexagonCodeGen
RDFDeadCode.cpp
RDFGraph.cpp
RDFLiveness.cpp
- )
+ RDFRegisters.cpp
+)
add_subdirectory(AsmParser)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
add_subdirectory(Disassembler)
-
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index c05fbc1d7756..ae15ed0e9240 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -57,11 +57,38 @@ public:
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
-
- void adjustExtendedInstructions(MCInst &MCI, MCInst const &MCB) const;
void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const;
};
+namespace {
+ uint32_t fullValue(MCInstrInfo const &MCII, MCInst &MCB, MCInst &MI,
+ int64_t Value) {
+ MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex(
+ MCB, HexagonMCInstrInfo::bundleSize(MCB));
+ if (!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI))
+ return Value;
+ unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+ uint32_t Lower6 = static_cast<uint32_t>(Value >> Alignment) & 0x3f;
+ int64_t Bits;
+ bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits);
+ assert(Success); (void)Success;
+ uint32_t Upper26 = static_cast<uint32_t>(Bits);
+ uint32_t Operand = Upper26 | Lower6;
+ return Operand;
+ }
+ HexagonDisassembler const &disassembler(void const *Decoder) {
+ return *static_cast<HexagonDisassembler const *>(Decoder);
+ }
+ template <size_t T>
+ void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ int64_t FullValue =
+ fullValue(*Disassembler.MCII, **Disassembler.CurrentBundle, MI,
+ SignExtend64<T>(tmp));
+ int64_t Extended = SignExtend64<32>(FullValue);
+ HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
+ }
+}
} // end anonymous namespace
// Forward declare these because the auto-generated code will reference them.
@@ -70,6 +97,10 @@ public:
static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeGeneralSubRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
@@ -79,6 +110,9 @@ static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus
+DecodeGeneralDoubleLow8RegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
@@ -98,31 +132,10 @@ static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn);
-static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
- void const *Decoder);
-
-static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
- raw_ostream &os);
-
-static unsigned getRegFromSubinstEncoding(unsigned encoded_reg);
-
static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
uint64_t Address, const void *Decoder);
-static DecodeStatus s16_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s12_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s10_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
+static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder);
static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
@@ -135,13 +148,12 @@ static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
-static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
+#include "HexagonDepDecoders.h"
#include "HexagonGenDisassemblerTables.inc"
static MCDisassembler *createHexagonDisassembler(const Target &T,
@@ -175,20 +187,31 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Size += HEXAGON_INSTR_SIZE;
Bytes = Bytes.slice(HEXAGON_INSTR_SIZE);
}
- if(Result == MCDisassembler::Fail)
+ if (Result == MCDisassembler::Fail)
return Result;
- HexagonMCChecker Checker (*MCII, STI, MI, MI, *getContext().getRegisterInfo());
- if(!Checker.check())
+ if (Size > HEXAGON_MAX_PACKET_SIZE)
+ return MCDisassembler::Fail;
+ HexagonMCChecker Checker(*MCII, STI, MI, MI, *getContext().getRegisterInfo());
+ if (!Checker.check())
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
-static HexagonDisassembler const &disassembler(void const *Decoder) {
- return *static_cast<HexagonDisassembler const *>(Decoder);
+namespace {
+void adjustDuplex(MCInst &MI, MCContext &Context) {
+ switch (MI.getOpcode()) {
+ case Hexagon::SA1_setin1:
+ MI.insert(MI.begin() + 1,
+ MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
+ break;
+ case Hexagon::SA1_dec:
+ MI.insert(MI.begin() + 2,
+ MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
+ break;
+ default:
+ break;
+ }
}
-
-static MCContext &contextFromDecoder(void const *Decoder) {
- return disassembler(Decoder).getContext();
}
DecodeStatus HexagonDisassembler::getSingleInstruction(
@@ -196,8 +219,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
raw_ostream &os, raw_ostream &cs, bool &Complete) const {
assert(Bytes.size() >= HEXAGON_INSTR_SIZE);
- uint32_t Instruction =
- (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
+ uint32_t Instruction = support::endian::read32le(Bytes.data());
auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB);
if ((Instruction & HexagonII::INST_PARSE_MASK) ==
@@ -210,103 +232,92 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
return DecodeStatus::Fail;
}
- DecodeStatus Result = DecodeStatus::Success;
+ MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex(
+ MCB, HexagonMCInstrInfo::bundleSize(MCB));
+
+ DecodeStatus Result = DecodeStatus::Fail;
if ((Instruction & HexagonII::INST_PARSE_MASK) ==
HexagonII::INST_PARSE_DUPLEX) {
- // Determine the instruction class of each instruction in the duplex.
- unsigned duplexIClass, IClassLow, IClassHigh;
-
+ unsigned duplexIClass;
+ uint8_t const *DecodeLow, *DecodeHigh;
duplexIClass = ((Instruction >> 28) & 0xe) | ((Instruction >> 13) & 0x1);
switch (duplexIClass) {
default:
return MCDisassembler::Fail;
case 0:
- IClassLow = HexagonII::HSIG_L1;
- IClassHigh = HexagonII::HSIG_L1;
+ DecodeLow = DecoderTableSUBINSN_L132;
+ DecodeHigh = DecoderTableSUBINSN_L132;
break;
case 1:
- IClassLow = HexagonII::HSIG_L2;
- IClassHigh = HexagonII::HSIG_L1;
+ DecodeLow = DecoderTableSUBINSN_L232;
+ DecodeHigh = DecoderTableSUBINSN_L132;
break;
case 2:
- IClassLow = HexagonII::HSIG_L2;
- IClassHigh = HexagonII::HSIG_L2;
+ DecodeLow = DecoderTableSUBINSN_L232;
+ DecodeHigh = DecoderTableSUBINSN_L232;
break;
case 3:
- IClassLow = HexagonII::HSIG_A;
- IClassHigh = HexagonII::HSIG_A;
+ DecodeLow = DecoderTableSUBINSN_A32;
+ DecodeHigh = DecoderTableSUBINSN_A32;
break;
case 4:
- IClassLow = HexagonII::HSIG_L1;
- IClassHigh = HexagonII::HSIG_A;
+ DecodeLow = DecoderTableSUBINSN_L132;
+ DecodeHigh = DecoderTableSUBINSN_A32;
break;
case 5:
- IClassLow = HexagonII::HSIG_L2;
- IClassHigh = HexagonII::HSIG_A;
+ DecodeLow = DecoderTableSUBINSN_L232;
+ DecodeHigh = DecoderTableSUBINSN_A32;
break;
case 6:
- IClassLow = HexagonII::HSIG_S1;
- IClassHigh = HexagonII::HSIG_A;
+ DecodeLow = DecoderTableSUBINSN_S132;
+ DecodeHigh = DecoderTableSUBINSN_A32;
break;
case 7:
- IClassLow = HexagonII::HSIG_S2;
- IClassHigh = HexagonII::HSIG_A;
+ DecodeLow = DecoderTableSUBINSN_S232;
+ DecodeHigh = DecoderTableSUBINSN_A32;
break;
case 8:
- IClassLow = HexagonII::HSIG_S1;
- IClassHigh = HexagonII::HSIG_L1;
+ DecodeLow = DecoderTableSUBINSN_S132;
+ DecodeHigh = DecoderTableSUBINSN_L132;
break;
case 9:
- IClassLow = HexagonII::HSIG_S1;
- IClassHigh = HexagonII::HSIG_L2;
+ DecodeLow = DecoderTableSUBINSN_S132;
+ DecodeHigh = DecoderTableSUBINSN_L232;
break;
case 10:
- IClassLow = HexagonII::HSIG_S1;
- IClassHigh = HexagonII::HSIG_S1;
+ DecodeLow = DecoderTableSUBINSN_S132;
+ DecodeHigh = DecoderTableSUBINSN_S132;
break;
case 11:
- IClassLow = HexagonII::HSIG_S2;
- IClassHigh = HexagonII::HSIG_S1;
+ DecodeLow = DecoderTableSUBINSN_S232;
+ DecodeHigh = DecoderTableSUBINSN_S132;
break;
case 12:
- IClassLow = HexagonII::HSIG_S2;
- IClassHigh = HexagonII::HSIG_L1;
+ DecodeLow = DecoderTableSUBINSN_S232;
+ DecodeHigh = DecoderTableSUBINSN_L132;
break;
case 13:
- IClassLow = HexagonII::HSIG_S2;
- IClassHigh = HexagonII::HSIG_L2;
+ DecodeLow = DecoderTableSUBINSN_S232;
+ DecodeHigh = DecoderTableSUBINSN_L232;
break;
case 14:
- IClassLow = HexagonII::HSIG_S2;
- IClassHigh = HexagonII::HSIG_S2;
+ DecodeLow = DecoderTableSUBINSN_S232;
+ DecodeHigh = DecoderTableSUBINSN_S232;
break;
}
-
- // Set the MCInst to be a duplex instruction. Which one doesn't matter.
- MI.setOpcode(Hexagon::DuplexIClass0);
-
- // Decode each instruction in the duplex.
- // Create an MCInst for each instruction.
- unsigned instLow = Instruction & 0x1fff;
- unsigned instHigh = (Instruction >> 16) & 0x1fff;
- unsigned opLow;
- if (GetSubinstOpcode(IClassLow, instLow, opLow, os) !=
- MCDisassembler::Success)
- return MCDisassembler::Fail;
- unsigned opHigh;
- if (GetSubinstOpcode(IClassHigh, instHigh, opHigh, os) !=
- MCDisassembler::Success)
- return MCDisassembler::Fail;
+ MI.setOpcode(Hexagon::DuplexIClass0 + duplexIClass);
MCInst *MILow = new (getContext()) MCInst;
- MILow->setOpcode(opLow);
MCInst *MIHigh = new (getContext()) MCInst;
- MIHigh->setOpcode(opHigh);
- addSubinstOperands(MILow, opLow, instLow);
- addSubinstOperands(MIHigh, opHigh, instHigh);
- // see ConvertToSubInst() in
- // lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
-
- // Add the duplex instruction MCInsts as operands to the passed in MCInst.
+ Result = decodeInstruction(DecodeLow, *MILow, Instruction & 0x1fff, Address,
+ this, STI);
+ if (Result != DecodeStatus::Success)
+ return DecodeStatus::Fail;
+ adjustDuplex(*MILow, getContext());
+ Result = decodeInstruction(
+ DecodeHigh, *MIHigh, (Instruction >> 16) & 0x1fff, Address, this, STI);
+ if (Result != DecodeStatus::Success)
+ return DecodeStatus::Fail;
+ adjustDuplex(*MIHigh, getContext());
MCOperand OPLow = MCOperand::createInst(MILow);
MCOperand OPHigh = MCOperand::createInst(MIHigh);
MI.addOperand(OPLow);
@@ -316,34 +327,23 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
if ((Instruction & HexagonII::INST_PARSE_MASK) ==
HexagonII::INST_PARSE_PACKET_END)
Complete = true;
- // Calling the auto-generated decoder function.
- Result =
- decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI);
- // If a, "standard" insn isn't found check special cases.
- if (MCDisassembler::Success != Result ||
- MI.getOpcode() == Hexagon::A4_ext) {
- Result = decodeImmext(MI, Instruction, this);
- if (MCDisassembler::Success != Result) {
- Result = decodeSpecial(MI, Instruction);
- }
- } else {
- // If the instruction is a compound instruction, register values will
- // follow the duplex model, so the register values in the MCInst are
- // incorrect. If the instruction is a compound, loop through the
- // operands and change registers appropriately.
- if (HexagonMCInstrInfo::getType(*MCII, MI) == HexagonII::TypeCOMPOUND) {
- for (MCInst::iterator i = MI.begin(), last = MI.end(); i < last; ++i) {
- if (i->isReg()) {
- unsigned reg = i->getReg() - Hexagon::R0;
- i->setReg(getRegFromSubinstEncoding(reg));
- }
- }
- }
- }
+ if (Extender != nullptr)
+ Result = decodeInstruction(DecoderTableMustExtend32, MI, Instruction,
+ Address, this, STI);
+
+ if (Result != MCDisassembler::Success)
+ Result = decodeInstruction(DecoderTable32, MI, Instruction, Address, this,
+ STI);
+
+ if (Result != MCDisassembler::Success &&
+ STI.getFeatureBits()[Hexagon::ExtensionHVX])
+ Result = decodeInstruction(DecoderTableEXT_mmvec32, MI, Instruction,
+ Address, this, STI);
+
}
- switch(MI.getOpcode()) {
+ switch (MI.getOpcode()) {
case Hexagon::J4_cmpeqn1_f_jumpnv_nt:
case Hexagon::J4_cmpeqn1_f_jumpnv_t:
case Hexagon::J4_cmpeqn1_fp0_jump_nt:
@@ -368,7 +368,8 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
case Hexagon::J4_cmpgtn1_tp0_jump_t:
case Hexagon::J4_cmpgtn1_tp1_jump_nt:
case Hexagon::J4_cmpgtn1_tp1_jump_t:
- MI.insert(MI.begin() + 1, MCOperand::createExpr(MCConstantExpr::create(-1, getContext())));
+ MI.insert(MI.begin() + 1,
+ MCOperand::createExpr(MCConstantExpr::create(-1, getContext())));
break;
default:
break;
@@ -423,13 +424,10 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
return MCDisassembler::Fail;
}
- adjustExtendedInstructions(MI, MCB);
- MCInst const *Extender =
- HexagonMCInstrInfo::extenderForIndex(MCB,
- HexagonMCInstrInfo::bundleSize(MCB));
- if(Extender != nullptr) {
- MCInst const & Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI) ?
- *MI.getOperand(1).getInst() : MI;
+ if (Extender != nullptr) {
+ MCInst const &Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI)
+ ? *MI.getOperand(1).getInst()
+ : MI;
if (!HexagonMCInstrInfo::isExtendable(*MCII, Inst) &&
!HexagonMCInstrInfo::isExtended(*MCII, Inst))
return MCDisassembler::Fail;
@@ -437,68 +435,6 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
return Result;
}
-void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI,
- MCInst const &MCB) const {
- if (!HexagonMCInstrInfo::hasExtenderForIndex(
- MCB, HexagonMCInstrInfo::bundleSize(MCB))) {
- unsigned opcode;
- // This code is used by the disassembler to disambiguate between GP
- // relative and absolute addressing instructions since they both have
- // same encoding bits. However, an absolute addressing instruction must
- // follow an immediate extender. Disassembler alwaus select absolute
- // addressing instructions first and uses this code to change them into
- // GP relative instruction in the absence of the corresponding immediate
- // extender.
- switch (MCI.getOpcode()) {
- case Hexagon::PS_storerbabs:
- opcode = Hexagon::S2_storerbgp;
- break;
- case Hexagon::PS_storerhabs:
- opcode = Hexagon::S2_storerhgp;
- break;
- case Hexagon::PS_storerfabs:
- opcode = Hexagon::S2_storerfgp;
- break;
- case Hexagon::PS_storeriabs:
- opcode = Hexagon::S2_storerigp;
- break;
- case Hexagon::PS_storerbnewabs:
- opcode = Hexagon::S2_storerbnewgp;
- break;
- case Hexagon::PS_storerhnewabs:
- opcode = Hexagon::S2_storerhnewgp;
- break;
- case Hexagon::PS_storerinewabs:
- opcode = Hexagon::S2_storerinewgp;
- break;
- case Hexagon::PS_storerdabs:
- opcode = Hexagon::S2_storerdgp;
- break;
- case Hexagon::PS_loadrbabs:
- opcode = Hexagon::L2_loadrbgp;
- break;
- case Hexagon::PS_loadrubabs:
- opcode = Hexagon::L2_loadrubgp;
- break;
- case Hexagon::PS_loadrhabs:
- opcode = Hexagon::L2_loadrhgp;
- break;
- case Hexagon::PS_loadruhabs:
- opcode = Hexagon::L2_loadruhgp;
- break;
- case Hexagon::PS_loadriabs:
- opcode = Hexagon::L2_loadrigp;
- break;
- case Hexagon::PS_loadrdabs:
- opcode = Hexagon::L2_loadrdgp;
- break;
- default:
- opcode = MCI.getOpcode();
- }
- MCI.setOpcode(opcode);
- }
-}
-
static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
ArrayRef<MCPhysReg> Table) {
if (RegNo < Table.size()) {
@@ -530,6 +466,20 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable);
}
+static DecodeStatus DecodeGeneralSubRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ static const MCPhysReg GeneralSubRegDecoderTable[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3,
+ Hexagon::R4, Hexagon::R5, Hexagon::R6, Hexagon::R7,
+ Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23,
+ };
+
+ return DecodeRegisterClass(Inst, RegNo, GeneralSubRegDecoderTable);
+}
+
static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
@@ -557,6 +507,15 @@ static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable);
}
+static DecodeStatus DecodeGeneralDoubleLow8RegsRegisterClass(
+ MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) {
+ static const MCPhysReg GeneralDoubleLow8RegDecoderTable[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3,
+ Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11};
+
+ return DecodeRegisterClass(Inst, RegNo, GeneralDoubleLow8RegDecoderTable);
+}
+
static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
@@ -590,17 +549,23 @@ static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
+ using namespace Hexagon;
static const MCPhysReg CtrlRegDecoderTable[] = {
- Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1,
- Hexagon::P3_0, Hexagon::C5, Hexagon::C6, Hexagon::C7,
- Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP,
- Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPC
+ /* 0 */ SA0, LC0, SA1, LC1,
+ /* 4 */ P3_0, C5, C6, C7,
+ /* 8 */ USR, PC, UGP, GP,
+ /* 12 */ CS0, CS1, UPCYCLELO, UPCYCLEHI,
+ /* 16 */ FRAMELIMIT, FRAMEKEY, PKTCOUNTLO, PKTCOUNTHI,
+ /* 20 */ 0, 0, 0, 0,
+ /* 24 */ 0, 0, 0, 0,
+ /* 28 */ 0, 0, UTIMERLO, UTIMERHI
};
if (RegNo >= array_lengthof(CtrlRegDecoderTable))
return MCDisassembler::Fail;
- if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister)
+ static_assert(NoRegister == 0, "Expecting NoRegister to be 0");
+ if (CtrlRegDecoderTable[RegNo] == NoRegister)
return MCDisassembler::Fail;
unsigned Register = CtrlRegDecoderTable[RegNo];
@@ -611,20 +576,23 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
+ using namespace Hexagon;
static const MCPhysReg CtrlReg64DecoderTable[] = {
- Hexagon::C1_0, Hexagon::NoRegister,
- Hexagon::C3_2, Hexagon::NoRegister,
- Hexagon::C7_6, Hexagon::NoRegister,
- Hexagon::C9_8, Hexagon::NoRegister,
- Hexagon::C11_10, Hexagon::NoRegister,
- Hexagon::CS, Hexagon::NoRegister,
- Hexagon::UPC, Hexagon::NoRegister
+ /* 0 */ C1_0, 0, C3_2, 0,
+ /* 4 */ C5_4, 0, C7_6, 0,
+ /* 8 */ C9_8, 0, C11_10, 0,
+ /* 12 */ CS, 0, UPCYCLE, 0,
+ /* 16 */ C17_16, 0, PKTCOUNT, 0,
+ /* 20 */ 0, 0, 0, 0,
+ /* 24 */ 0, 0, 0, 0,
+ /* 28 */ 0, 0, UTIMER, 0
};
if (RegNo >= array_lengthof(CtrlReg64DecoderTable))
return MCDisassembler::Fail;
- if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister)
+ static_assert(NoRegister == 0, "Expecting NoRegister to be 0");
+ if (CtrlReg64DecoderTable[RegNo] == NoRegister)
return MCDisassembler::Fail;
unsigned Register = CtrlReg64DecoderTable[RegNo];
@@ -650,132 +618,23 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static uint32_t fullValue(MCInstrInfo const &MCII, MCInst &MCB, MCInst &MI,
- int64_t Value) {
- MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex(
- MCB, HexagonMCInstrInfo::bundleSize(MCB));
- if(!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI))
- return Value;
- unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
- uint32_t Lower6 = static_cast<uint32_t>(Value >> Alignment) & 0x3f;
- int64_t Bits;
- bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits);
- assert(Success);(void)Success;
- uint32_t Upper26 = static_cast<uint32_t>(Bits);
- uint32_t Operand = Upper26 | Lower6;
- return Operand;
-}
-
-template <size_t T>
-static void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
- HexagonDisassembler const &Disassembler = disassembler(Decoder);
- int64_t FullValue = fullValue(*Disassembler.MCII,
- **Disassembler.CurrentBundle,
- MI, SignExtend64<T>(tmp));
- int64_t Extended = SignExtend64<32>(FullValue);
- HexagonMCInstrInfo::addConstant(MI, Extended,
- Disassembler.getContext());
-}
-
static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/,
const void *Decoder) {
HexagonDisassembler const &Disassembler = disassembler(Decoder);
- int64_t FullValue = fullValue(*Disassembler.MCII,
- **Disassembler.CurrentBundle,
- MI, tmp);
+ int64_t FullValue =
+ fullValue(*Disassembler.MCII, **Disassembler.CurrentBundle, MI, tmp);
assert(FullValue >= 0 && "Negative in unsigned decoder");
HexagonMCInstrInfo::addConstant(MI, FullValue, Disassembler.getContext());
return MCDisassembler::Success;
}
-static DecodeStatus s16_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<16>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s12_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<12>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<11>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- HexagonMCInstrInfo::addConstant(MI, SignExtend64<12>(tmp), contextFromDecoder(Decoder));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp,
+static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<13>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<14>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s10_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<10>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/,
- const void *Decoder) {
- signedDecoder<8>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<6>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<4>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<5>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<6>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<7>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<10>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<19>(MI, tmp, Decoder);
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI);
+ tmp = SignExtend64(tmp, Bits);
+ signedDecoder<32>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
@@ -787,838 +646,13 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
// r13_2 is not extendable, so if there are no extent bits, it's r13_2
if (Bits == 0)
Bits = 15;
- uint32_t FullValue = fullValue(*Disassembler.MCII,
- **Disassembler.CurrentBundle,
- MI, SignExtend64(tmp, Bits));
+ uint32_t FullValue =
+ fullValue(*Disassembler.MCII, **Disassembler.CurrentBundle, MI,
+ SignExtend64(tmp, Bits));
int64_t Extended = SignExtend64<32>(FullValue) + Address;
- if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true,
- 0, 4))
+ if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 4))
HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
return MCDisassembler::Success;
}
-// Addressing mode dependent load store opcode map.
-// - If an insn is preceded by an extender the address is absolute.
-// - memw(##symbol) = r0
-// - If an insn is not preceded by an extender the address is GP relative.
-// - memw(gp + #symbol) = r0
-// Please note that the instructions must be ordered in the descending order
-// of their opcode.
-// HexagonII::INST_ICLASS_ST
-static const unsigned int StoreConditionalOpcodeData[][2] = {
- {S4_pstorerdfnew_abs, 0xafc02084},
- {S4_pstorerdtnew_abs, 0xafc02080},
- {S4_pstorerdf_abs, 0xafc00084},
- {S4_pstorerdt_abs, 0xafc00080},
- {S4_pstorerinewfnew_abs, 0xafa03084},
- {S4_pstorerinewtnew_abs, 0xafa03080},
- {S4_pstorerhnewfnew_abs, 0xafa02884},
- {S4_pstorerhnewtnew_abs, 0xafa02880},
- {S4_pstorerbnewfnew_abs, 0xafa02084},
- {S4_pstorerbnewtnew_abs, 0xafa02080},
- {S4_pstorerinewf_abs, 0xafa01084},
- {S4_pstorerinewt_abs, 0xafa01080},
- {S4_pstorerhnewf_abs, 0xafa00884},
- {S4_pstorerhnewt_abs, 0xafa00880},
- {S4_pstorerbnewf_abs, 0xafa00084},
- {S4_pstorerbnewt_abs, 0xafa00080},
- {S4_pstorerifnew_abs, 0xaf802084},
- {S4_pstoreritnew_abs, 0xaf802080},
- {S4_pstorerif_abs, 0xaf800084},
- {S4_pstorerit_abs, 0xaf800080},
- {S4_pstorerhfnew_abs, 0xaf402084},
- {S4_pstorerhtnew_abs, 0xaf402080},
- {S4_pstorerhf_abs, 0xaf400084},
- {S4_pstorerht_abs, 0xaf400080},
- {S4_pstorerbfnew_abs, 0xaf002084},
- {S4_pstorerbtnew_abs, 0xaf002080},
- {S4_pstorerbf_abs, 0xaf000084},
- {S4_pstorerbt_abs, 0xaf000080}};
-// HexagonII::INST_ICLASS_LD
-
-// HexagonII::INST_ICLASS_LD_ST_2
-static unsigned int LoadStoreOpcodeData[][2] = {{PS_loadrdabs, 0x49c00000},
- {PS_loadriabs, 0x49800000},
- {PS_loadruhabs, 0x49600000},
- {PS_loadrhabs, 0x49400000},
- {PS_loadrubabs, 0x49200000},
- {PS_loadrbabs, 0x49000000},
- {PS_storerdabs, 0x48c00000},
- {PS_storerinewabs, 0x48a01000},
- {PS_storerhnewabs, 0x48a00800},
- {PS_storerbnewabs, 0x48a00000},
- {PS_storeriabs, 0x48800000},
- {PS_storerfabs, 0x48600000},
- {PS_storerhabs, 0x48400000},
- {PS_storerbabs, 0x48000000}};
-static const size_t NumCondS = array_lengthof(StoreConditionalOpcodeData);
-static const size_t NumLS = array_lengthof(LoadStoreOpcodeData);
-
-static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
- unsigned MachineOpcode = 0;
- unsigned LLVMOpcode = 0;
-
- if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_ST) {
- for (size_t i = 0; i < NumCondS; ++i) {
- if ((insn & StoreConditionalOpcodeData[i][1]) ==
- StoreConditionalOpcodeData[i][1]) {
- MachineOpcode = StoreConditionalOpcodeData[i][1];
- LLVMOpcode = StoreConditionalOpcodeData[i][0];
- break;
- }
- }
- }
- if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_LD_ST_2) {
- for (size_t i = 0; i < NumLS; ++i) {
- if ((insn & LoadStoreOpcodeData[i][1]) == LoadStoreOpcodeData[i][1]) {
- MachineOpcode = LoadStoreOpcodeData[i][1];
- LLVMOpcode = LoadStoreOpcodeData[i][0];
- break;
- }
- }
- }
-
- if (MachineOpcode) {
- unsigned Value = 0;
- unsigned shift = 0;
- MI.setOpcode(LLVMOpcode);
- // Remove the parse bits from the insn.
- insn &= ~HexagonII::INST_PARSE_MASK;
-
- switch (LLVMOpcode) {
- default:
- return MCDisassembler::Fail;
- break;
-
- case Hexagon::S4_pstorerdf_abs:
- case Hexagon::S4_pstorerdt_abs:
- case Hexagon::S4_pstorerdfnew_abs:
- case Hexagon::S4_pstorerdtnew_abs:
- // op: Pv
- Value = insn & UINT64_C(3);
- DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
- // op: u6
- Value = (insn >> 12) & UINT64_C(48);
- Value |= (insn >> 3) & UINT64_C(15);
- MI.addOperand(MCOperand::createImm(Value));
- // op: Rtt
- Value = (insn >> 8) & UINT64_C(31);
- DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr);
- break;
-
- case Hexagon::S4_pstorerbnewf_abs:
- case Hexagon::S4_pstorerbnewt_abs:
- case Hexagon::S4_pstorerbnewfnew_abs:
- case Hexagon::S4_pstorerbnewtnew_abs:
- case Hexagon::S4_pstorerhnewf_abs:
- case Hexagon::S4_pstorerhnewt_abs:
- case Hexagon::S4_pstorerhnewfnew_abs:
- case Hexagon::S4_pstorerhnewtnew_abs:
- case Hexagon::S4_pstorerinewf_abs:
- case Hexagon::S4_pstorerinewt_abs:
- case Hexagon::S4_pstorerinewfnew_abs:
- case Hexagon::S4_pstorerinewtnew_abs:
- // op: Pv
- Value = insn & UINT64_C(3);
- DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
- // op: u6
- Value = (insn >> 12) & UINT64_C(48);
- Value |= (insn >> 3) & UINT64_C(15);
- MI.addOperand(MCOperand::createImm(Value));
- // op: Nt
- Value = (insn >> 8) & UINT64_C(7);
- DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
- break;
-
- case Hexagon::S4_pstorerbf_abs:
- case Hexagon::S4_pstorerbt_abs:
- case Hexagon::S4_pstorerbfnew_abs:
- case Hexagon::S4_pstorerbtnew_abs:
- case Hexagon::S4_pstorerhf_abs:
- case Hexagon::S4_pstorerht_abs:
- case Hexagon::S4_pstorerhfnew_abs:
- case Hexagon::S4_pstorerhtnew_abs:
- case Hexagon::S4_pstorerif_abs:
- case Hexagon::S4_pstorerit_abs:
- case Hexagon::S4_pstorerifnew_abs:
- case Hexagon::S4_pstoreritnew_abs:
- // op: Pv
- Value = insn & UINT64_C(3);
- DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
- // op: u6
- Value = (insn >> 12) & UINT64_C(48);
- Value |= (insn >> 3) & UINT64_C(15);
- MI.addOperand(MCOperand::createImm(Value));
- // op: Rt
- Value = (insn >> 8) & UINT64_C(31);
- DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
- break;
-
- case Hexagon::L4_ploadrdf_abs:
- case Hexagon::L4_ploadrdt_abs:
- case Hexagon::L4_ploadrdfnew_abs:
- case Hexagon::L4_ploadrdtnew_abs:
- // op: Rdd
- Value = insn & UINT64_C(31);
- DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr);
- // op: Pt
- Value = ((insn >> 9) & UINT64_C(3));
- DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
- // op: u6
- Value = ((insn >> 15) & UINT64_C(62));
- Value |= ((insn >> 8) & UINT64_C(1));
- MI.addOperand(MCOperand::createImm(Value));
- break;
-
- case Hexagon::L4_ploadrbf_abs:
- case Hexagon::L4_ploadrbt_abs:
- case Hexagon::L4_ploadrbfnew_abs:
- case Hexagon::L4_ploadrbtnew_abs:
- case Hexagon::L4_ploadrhf_abs:
- case Hexagon::L4_ploadrht_abs:
- case Hexagon::L4_ploadrhfnew_abs:
- case Hexagon::L4_ploadrhtnew_abs:
- case Hexagon::L4_ploadrubf_abs:
- case Hexagon::L4_ploadrubt_abs:
- case Hexagon::L4_ploadrubfnew_abs:
- case Hexagon::L4_ploadrubtnew_abs:
- case Hexagon::L4_ploadruhf_abs:
- case Hexagon::L4_ploadruht_abs:
- case Hexagon::L4_ploadruhfnew_abs:
- case Hexagon::L4_ploadruhtnew_abs:
- case Hexagon::L4_ploadrif_abs:
- case Hexagon::L4_ploadrit_abs:
- case Hexagon::L4_ploadrifnew_abs:
- case Hexagon::L4_ploadritnew_abs:
- // op: Rd
- Value = insn & UINT64_C(31);
- DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
- // op: Pt
- Value = (insn >> 9) & UINT64_C(3);
- DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
- // op: u6
- Value = (insn >> 15) & UINT64_C(62);
- Value |= (insn >> 8) & UINT64_C(1);
- MI.addOperand(MCOperand::createImm(Value));
- break;
-
- // op: g16_2
- case (Hexagon::PS_loadriabs):
- ++shift;
- // op: g16_1
- case Hexagon::PS_loadrhabs:
- case Hexagon::PS_loadruhabs:
- ++shift;
- // op: g16_0
- case Hexagon::PS_loadrbabs:
- case Hexagon::PS_loadrubabs:
- // op: Rd
- Value |= insn & UINT64_C(31);
- DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
- Value = (insn >> 11) & UINT64_C(49152);
- Value |= (insn >> 7) & UINT64_C(15872);
- Value |= (insn >> 5) & UINT64_C(511);
- MI.addOperand(MCOperand::createImm(Value << shift));
- break;
-
- case Hexagon::PS_loadrdabs:
- Value = insn & UINT64_C(31);
- DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr);
- Value = (insn >> 11) & UINT64_C(49152);
- Value |= (insn >> 7) & UINT64_C(15872);
- Value |= (insn >> 5) & UINT64_C(511);
- MI.addOperand(MCOperand::createImm(Value << 3));
- break;
-
- case Hexagon::PS_storerdabs:
- // op: g16_3
- Value = (insn >> 11) & UINT64_C(49152);
- Value |= (insn >> 7) & UINT64_C(15872);
- Value |= (insn >> 5) & UINT64_C(256);
- Value |= insn & UINT64_C(255);
- MI.addOperand(MCOperand::createImm(Value << 3));
- // op: Rtt
- Value = (insn >> 8) & UINT64_C(31);
- DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr);
- break;
-
- // op: g16_2
- case Hexagon::PS_storerinewabs:
- ++shift;
- // op: g16_1
- case Hexagon::PS_storerhnewabs:
- ++shift;
- // op: g16_0
- case Hexagon::PS_storerbnewabs:
- Value = (insn >> 11) & UINT64_C(49152);
- Value |= (insn >> 7) & UINT64_C(15872);
- Value |= (insn >> 5) & UINT64_C(256);
- Value |= insn & UINT64_C(255);
- MI.addOperand(MCOperand::createImm(Value << shift));
- // op: Nt
- Value = (insn >> 8) & UINT64_C(7);
- DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
- break;
-
- // op: g16_2
- case Hexagon::PS_storeriabs:
- ++shift;
- // op: g16_1
- case Hexagon::PS_storerhabs:
- case Hexagon::PS_storerfabs:
- ++shift;
- // op: g16_0
- case Hexagon::PS_storerbabs:
- Value = (insn >> 11) & UINT64_C(49152);
- Value |= (insn >> 7) & UINT64_C(15872);
- Value |= (insn >> 5) & UINT64_C(256);
- Value |= insn & UINT64_C(255);
- MI.addOperand(MCOperand::createImm(Value << shift));
- // op: Rt
- Value = (insn >> 8) & UINT64_C(31);
- DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
- break;
- }
- return MCDisassembler::Success;
- }
- return MCDisassembler::Fail;
-}
-
-static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
- void const *Decoder) {
- // Instruction Class for a constant a extender: bits 31:28 = 0x0000
- if ((~insn & 0xf0000000) == 0xf0000000) {
- unsigned Value;
- // 27:16 High 12 bits of 26-bit extender.
- Value = (insn & 0x0fff0000) << 4;
- // 13:0 Low 14 bits of 26-bit extender.
- Value |= ((insn & 0x3fff) << 6);
- MI.setOpcode(Hexagon::A4_ext);
- HexagonMCInstrInfo::addConstant(MI, Value, contextFromDecoder(Decoder));
- return MCDisassembler::Success;
- }
- return MCDisassembler::Fail;
-}
-
-// These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td
-enum subInstBinaryValues {
- SA1_addi_BITS = 0x0000,
- SA1_addi_MASK = 0x1800,
- SA1_addrx_BITS = 0x1800,
- SA1_addrx_MASK = 0x1f00,
- SA1_addsp_BITS = 0x0c00,
- SA1_addsp_MASK = 0x1c00,
- SA1_and1_BITS = 0x1200,
- SA1_and1_MASK = 0x1f00,
- SA1_clrf_BITS = 0x1a70,
- SA1_clrf_MASK = 0x1e70,
- SA1_clrfnew_BITS = 0x1a50,
- SA1_clrfnew_MASK = 0x1e70,
- SA1_clrt_BITS = 0x1a60,
- SA1_clrt_MASK = 0x1e70,
- SA1_clrtnew_BITS = 0x1a40,
- SA1_clrtnew_MASK = 0x1e70,
- SA1_cmpeqi_BITS = 0x1900,
- SA1_cmpeqi_MASK = 0x1f00,
- SA1_combine0i_BITS = 0x1c00,
- SA1_combine0i_MASK = 0x1d18,
- SA1_combine1i_BITS = 0x1c08,
- SA1_combine1i_MASK = 0x1d18,
- SA1_combine2i_BITS = 0x1c10,
- SA1_combine2i_MASK = 0x1d18,
- SA1_combine3i_BITS = 0x1c18,
- SA1_combine3i_MASK = 0x1d18,
- SA1_combinerz_BITS = 0x1d08,
- SA1_combinerz_MASK = 0x1d08,
- SA1_combinezr_BITS = 0x1d00,
- SA1_combinezr_MASK = 0x1d08,
- SA1_dec_BITS = 0x1300,
- SA1_dec_MASK = 0x1f00,
- SA1_inc_BITS = 0x1100,
- SA1_inc_MASK = 0x1f00,
- SA1_seti_BITS = 0x0800,
- SA1_seti_MASK = 0x1c00,
- SA1_setin1_BITS = 0x1a00,
- SA1_setin1_MASK = 0x1e40,
- SA1_sxtb_BITS = 0x1500,
- SA1_sxtb_MASK = 0x1f00,
- SA1_sxth_BITS = 0x1400,
- SA1_sxth_MASK = 0x1f00,
- SA1_tfr_BITS = 0x1000,
- SA1_tfr_MASK = 0x1f00,
- SA1_zxtb_BITS = 0x1700,
- SA1_zxtb_MASK = 0x1f00,
- SA1_zxth_BITS = 0x1600,
- SA1_zxth_MASK = 0x1f00,
- SL1_loadri_io_BITS = 0x0000,
- SL1_loadri_io_MASK = 0x1000,
- SL1_loadrub_io_BITS = 0x1000,
- SL1_loadrub_io_MASK = 0x1000,
- SL2_deallocframe_BITS = 0x1f00,
- SL2_deallocframe_MASK = 0x1fc0,
- SL2_jumpr31_BITS = 0x1fc0,
- SL2_jumpr31_MASK = 0x1fc4,
- SL2_jumpr31_f_BITS = 0x1fc5,
- SL2_jumpr31_f_MASK = 0x1fc7,
- SL2_jumpr31_fnew_BITS = 0x1fc7,
- SL2_jumpr31_fnew_MASK = 0x1fc7,
- SL2_jumpr31_t_BITS = 0x1fc4,
- SL2_jumpr31_t_MASK = 0x1fc7,
- SL2_jumpr31_tnew_BITS = 0x1fc6,
- SL2_jumpr31_tnew_MASK = 0x1fc7,
- SL2_loadrb_io_BITS = 0x1000,
- SL2_loadrb_io_MASK = 0x1800,
- SL2_loadrd_sp_BITS = 0x1e00,
- SL2_loadrd_sp_MASK = 0x1f00,
- SL2_loadrh_io_BITS = 0x0000,
- SL2_loadrh_io_MASK = 0x1800,
- SL2_loadri_sp_BITS = 0x1c00,
- SL2_loadri_sp_MASK = 0x1e00,
- SL2_loadruh_io_BITS = 0x0800,
- SL2_loadruh_io_MASK = 0x1800,
- SL2_return_BITS = 0x1f40,
- SL2_return_MASK = 0x1fc4,
- SL2_return_f_BITS = 0x1f45,
- SL2_return_f_MASK = 0x1fc7,
- SL2_return_fnew_BITS = 0x1f47,
- SL2_return_fnew_MASK = 0x1fc7,
- SL2_return_t_BITS = 0x1f44,
- SL2_return_t_MASK = 0x1fc7,
- SL2_return_tnew_BITS = 0x1f46,
- SL2_return_tnew_MASK = 0x1fc7,
- SS1_storeb_io_BITS = 0x1000,
- SS1_storeb_io_MASK = 0x1000,
- SS1_storew_io_BITS = 0x0000,
- SS1_storew_io_MASK = 0x1000,
- SS2_allocframe_BITS = 0x1c00,
- SS2_allocframe_MASK = 0x1e00,
- SS2_storebi0_BITS = 0x1200,
- SS2_storebi0_MASK = 0x1f00,
- SS2_storebi1_BITS = 0x1300,
- SS2_storebi1_MASK = 0x1f00,
- SS2_stored_sp_BITS = 0x0a00,
- SS2_stored_sp_MASK = 0x1e00,
- SS2_storeh_io_BITS = 0x0000,
- SS2_storeh_io_MASK = 0x1800,
- SS2_storew_sp_BITS = 0x0800,
- SS2_storew_sp_MASK = 0x1e00,
- SS2_storewi0_BITS = 0x1000,
- SS2_storewi0_MASK = 0x1f00,
- SS2_storewi1_BITS = 0x1100,
- SS2_storewi1_MASK = 0x1f00
-};
-static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
- raw_ostream &os) {
- switch (IClass) {
- case HexagonII::HSIG_L1:
- if ((inst & SL1_loadri_io_MASK) == SL1_loadri_io_BITS)
- op = Hexagon::SL1_loadri_io;
- else if ((inst & SL1_loadrub_io_MASK) == SL1_loadrub_io_BITS)
- op = Hexagon::SL1_loadrub_io;
- else {
- os << "<unknown subinstruction>";
- return MCDisassembler::Fail;
- }
- break;
- case HexagonII::HSIG_L2:
- if ((inst & SL2_deallocframe_MASK) == SL2_deallocframe_BITS)
- op = Hexagon::SL2_deallocframe;
- else if ((inst & SL2_jumpr31_MASK) == SL2_jumpr31_BITS)
- op = Hexagon::SL2_jumpr31;
- else if ((inst & SL2_jumpr31_f_MASK) == SL2_jumpr31_f_BITS)
- op = Hexagon::SL2_jumpr31_f;
- else if ((inst & SL2_jumpr31_fnew_MASK) == SL2_jumpr31_fnew_BITS)
- op = Hexagon::SL2_jumpr31_fnew;
- else if ((inst & SL2_jumpr31_t_MASK) == SL2_jumpr31_t_BITS)
- op = Hexagon::SL2_jumpr31_t;
- else if ((inst & SL2_jumpr31_tnew_MASK) == SL2_jumpr31_tnew_BITS)
- op = Hexagon::SL2_jumpr31_tnew;
- else if ((inst & SL2_loadrb_io_MASK) == SL2_loadrb_io_BITS)
- op = Hexagon::SL2_loadrb_io;
- else if ((inst & SL2_loadrd_sp_MASK) == SL2_loadrd_sp_BITS)
- op = Hexagon::SL2_loadrd_sp;
- else if ((inst & SL2_loadrh_io_MASK) == SL2_loadrh_io_BITS)
- op = Hexagon::SL2_loadrh_io;
- else if ((inst & SL2_loadri_sp_MASK) == SL2_loadri_sp_BITS)
- op = Hexagon::SL2_loadri_sp;
- else if ((inst & SL2_loadruh_io_MASK) == SL2_loadruh_io_BITS)
- op = Hexagon::SL2_loadruh_io;
- else if ((inst & SL2_return_MASK) == SL2_return_BITS)
- op = Hexagon::SL2_return;
- else if ((inst & SL2_return_f_MASK) == SL2_return_f_BITS)
- op = Hexagon::SL2_return_f;
- else if ((inst & SL2_return_fnew_MASK) == SL2_return_fnew_BITS)
- op = Hexagon::SL2_return_fnew;
- else if ((inst & SL2_return_t_MASK) == SL2_return_t_BITS)
- op = Hexagon::SL2_return_t;
- else if ((inst & SL2_return_tnew_MASK) == SL2_return_tnew_BITS)
- op = Hexagon::SL2_return_tnew;
- else {
- os << "<unknown subinstruction>";
- return MCDisassembler::Fail;
- }
- break;
- case HexagonII::HSIG_A:
- if ((inst & SA1_addi_MASK) == SA1_addi_BITS)
- op = Hexagon::SA1_addi;
- else if ((inst & SA1_addrx_MASK) == SA1_addrx_BITS)
- op = Hexagon::SA1_addrx;
- else if ((inst & SA1_addsp_MASK) == SA1_addsp_BITS)
- op = Hexagon::SA1_addsp;
- else if ((inst & SA1_and1_MASK) == SA1_and1_BITS)
- op = Hexagon::SA1_and1;
- else if ((inst & SA1_clrf_MASK) == SA1_clrf_BITS)
- op = Hexagon::SA1_clrf;
- else if ((inst & SA1_clrfnew_MASK) == SA1_clrfnew_BITS)
- op = Hexagon::SA1_clrfnew;
- else if ((inst & SA1_clrt_MASK) == SA1_clrt_BITS)
- op = Hexagon::SA1_clrt;
- else if ((inst & SA1_clrtnew_MASK) == SA1_clrtnew_BITS)
- op = Hexagon::SA1_clrtnew;
- else if ((inst & SA1_cmpeqi_MASK) == SA1_cmpeqi_BITS)
- op = Hexagon::SA1_cmpeqi;
- else if ((inst & SA1_combine0i_MASK) == SA1_combine0i_BITS)
- op = Hexagon::SA1_combine0i;
- else if ((inst & SA1_combine1i_MASK) == SA1_combine1i_BITS)
- op = Hexagon::SA1_combine1i;
- else if ((inst & SA1_combine2i_MASK) == SA1_combine2i_BITS)
- op = Hexagon::SA1_combine2i;
- else if ((inst & SA1_combine3i_MASK) == SA1_combine3i_BITS)
- op = Hexagon::SA1_combine3i;
- else if ((inst & SA1_combinerz_MASK) == SA1_combinerz_BITS)
- op = Hexagon::SA1_combinerz;
- else if ((inst & SA1_combinezr_MASK) == SA1_combinezr_BITS)
- op = Hexagon::SA1_combinezr;
- else if ((inst & SA1_dec_MASK) == SA1_dec_BITS)
- op = Hexagon::SA1_dec;
- else if ((inst & SA1_inc_MASK) == SA1_inc_BITS)
- op = Hexagon::SA1_inc;
- else if ((inst & SA1_seti_MASK) == SA1_seti_BITS)
- op = Hexagon::SA1_seti;
- else if ((inst & SA1_setin1_MASK) == SA1_setin1_BITS)
- op = Hexagon::SA1_setin1;
- else if ((inst & SA1_sxtb_MASK) == SA1_sxtb_BITS)
- op = Hexagon::SA1_sxtb;
- else if ((inst & SA1_sxth_MASK) == SA1_sxth_BITS)
- op = Hexagon::SA1_sxth;
- else if ((inst & SA1_tfr_MASK) == SA1_tfr_BITS)
- op = Hexagon::SA1_tfr;
- else if ((inst & SA1_zxtb_MASK) == SA1_zxtb_BITS)
- op = Hexagon::SA1_zxtb;
- else if ((inst & SA1_zxth_MASK) == SA1_zxth_BITS)
- op = Hexagon::SA1_zxth;
- else {
- os << "<unknown subinstruction>";
- return MCDisassembler::Fail;
- }
- break;
- case HexagonII::HSIG_S1:
- if ((inst & SS1_storeb_io_MASK) == SS1_storeb_io_BITS)
- op = Hexagon::SS1_storeb_io;
- else if ((inst & SS1_storew_io_MASK) == SS1_storew_io_BITS)
- op = Hexagon::SS1_storew_io;
- else {
- os << "<unknown subinstruction>";
- return MCDisassembler::Fail;
- }
- break;
- case HexagonII::HSIG_S2:
- if ((inst & SS2_allocframe_MASK) == SS2_allocframe_BITS)
- op = Hexagon::SS2_allocframe;
- else if ((inst & SS2_storebi0_MASK) == SS2_storebi0_BITS)
- op = Hexagon::SS2_storebi0;
- else if ((inst & SS2_storebi1_MASK) == SS2_storebi1_BITS)
- op = Hexagon::SS2_storebi1;
- else if ((inst & SS2_stored_sp_MASK) == SS2_stored_sp_BITS)
- op = Hexagon::SS2_stored_sp;
- else if ((inst & SS2_storeh_io_MASK) == SS2_storeh_io_BITS)
- op = Hexagon::SS2_storeh_io;
- else if ((inst & SS2_storew_sp_MASK) == SS2_storew_sp_BITS)
- op = Hexagon::SS2_storew_sp;
- else if ((inst & SS2_storewi0_MASK) == SS2_storewi0_BITS)
- op = Hexagon::SS2_storewi0;
- else if ((inst & SS2_storewi1_MASK) == SS2_storewi1_BITS)
- op = Hexagon::SS2_storewi1;
- else {
- os << "<unknown subinstruction>";
- return MCDisassembler::Fail;
- }
- break;
- default:
- os << "<unknown>";
- return MCDisassembler::Fail;
- }
- return MCDisassembler::Success;
-}
-
-static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) {
- if (encoded_reg < 8)
- return Hexagon::R0 + encoded_reg;
- else if (encoded_reg < 16)
- return Hexagon::R0 + encoded_reg + 8;
-
- // patently false value
- return Hexagon::NoRegister;
-}
-
-static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) {
- if (encoded_dreg < 4)
- return Hexagon::D0 + encoded_dreg;
- else if (encoded_dreg < 8)
- return Hexagon::D0 + encoded_dreg + 4;
-
- // patently false value
- return Hexagon::NoRegister;
-}
-
-void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
- unsigned inst) const {
- int64_t operand;
- MCOperand Op;
- switch (opcode) {
- case Hexagon::SL2_deallocframe:
- case Hexagon::SL2_jumpr31:
- case Hexagon::SL2_jumpr31_f:
- case Hexagon::SL2_jumpr31_fnew:
- case Hexagon::SL2_jumpr31_t:
- case Hexagon::SL2_jumpr31_tnew:
- case Hexagon::SL2_return:
- case Hexagon::SL2_return_f:
- case Hexagon::SL2_return_fnew:
- case Hexagon::SL2_return_t:
- case Hexagon::SL2_return_tnew:
- // no operands for these instructions
- break;
- case Hexagon::SS2_allocframe:
- // u 8-4{5_3}
- operand = ((inst & 0x1f0) >> 4) << 3;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SL1_loadri_io:
- // Rd 3-0, Rs 7-4, u 11-8{4_2}
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0xf00) >> 6;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SL1_loadrub_io:
- // Rd 3-0, Rs 7-4, u 11-8
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0xf00) >> 8;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SL2_loadrb_io:
- // Rd 3-0, Rs 7-4, u 10-8
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0x700) >> 8;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SL2_loadrh_io:
- case Hexagon::SL2_loadruh_io:
- // Rd 3-0, Rs 7-4, u 10-8{3_1}
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = ((inst & 0x700) >> 8) << 1;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SL2_loadrd_sp:
- // Rdd 2-0, u 7-3{5_3}
- operand = getDRegFromSubinstEncoding(inst & 0x7);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = ((inst & 0x0f8) >> 3) << 3;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SL2_loadri_sp:
- // Rd 3-0, u 8-4{5_2}
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = ((inst & 0x1f0) >> 4) << 2;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SA1_addi:
- // Rx 3-0 (x2), s7 10-4
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- MI->addOperand(Op);
- operand = SignExtend64<7>((inst & 0x7f0) >> 4);
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SA1_addrx:
- // Rx 3-0 (x2), Rs 7-4
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SA1_and1:
- case Hexagon::SA1_dec:
- case Hexagon::SA1_inc:
- case Hexagon::SA1_sxtb:
- case Hexagon::SA1_sxth:
- case Hexagon::SA1_tfr:
- case Hexagon::SA1_zxtb:
- case Hexagon::SA1_zxth:
- // Rd 3-0, Rs 7-4
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SA1_addsp:
- // Rd 3-0, u 9-4{6_2}
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = ((inst & 0x3f0) >> 4) << 2;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SA1_seti:
- // Rd 3-0, u 9-4
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0x3f0) >> 4;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SA1_clrf:
- case Hexagon::SA1_clrfnew:
- case Hexagon::SA1_clrt:
- case Hexagon::SA1_clrtnew:
- case Hexagon::SA1_setin1:
- // Rd 3-0
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- if (opcode == Hexagon::SA1_setin1)
- break;
- MI->addOperand(MCOperand::createReg(Hexagon::P0));
- break;
- case Hexagon::SA1_cmpeqi:
- // Rs 7-4, u 1-0
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = inst & 0x3;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SA1_combine0i:
- case Hexagon::SA1_combine1i:
- case Hexagon::SA1_combine2i:
- case Hexagon::SA1_combine3i:
- // Rdd 2-0, u 6-5
- operand = getDRegFromSubinstEncoding(inst & 0x7);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0x060) >> 5;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SA1_combinerz:
- case Hexagon::SA1_combinezr:
- // Rdd 2-0, Rs 7-4
- operand = getDRegFromSubinstEncoding(inst & 0x7);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SS1_storeb_io:
- // Rs 7-4, u 11-8, Rt 3-0
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0xf00) >> 8;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SS1_storew_io:
- // Rs 7-4, u 11-8{4_2}, Rt 3-0
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = ((inst & 0xf00) >> 8) << 2;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SS2_storebi0:
- case Hexagon::SS2_storebi1:
- // Rs 7-4, u 3-0
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = inst & 0xf;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SS2_storewi0:
- case Hexagon::SS2_storewi1:
- // Rs 7-4, u 3-0{4_2}
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0xf) << 2;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SS2_stored_sp:
- // s 8-3{6_3}, Rtt 2-0
- operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3);
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- operand = getDRegFromSubinstEncoding(inst & 0x7);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SS2_storeh_io:
- // Rs 7-4, u 10-8{3_1}, Rt 3-0
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = ((inst & 0x700) >> 8) << 1;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SS2_storew_sp:
- // u 8-4{5_2}, Rd 3-0
- operand = ((inst & 0x1f0) >> 4) << 2;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- default:
- // don't crash with an invalid subinstruction
- // llvm_unreachable("Invalid subinstruction in duplex instruction");
- break;
- }
-}
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 0b2b46387b6a..4767165141a3 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -22,14 +22,12 @@ include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// Hexagon Architectures
-def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Hexagon V4">;
-def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">;
-def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Hexagon V55">;
-def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Hexagon V60">;
+include "HexagonDepArch.td"
-def FeatureHVX: SubtargetFeature<"hvx", "UseHVXOps", "true",
+// Hexagon ISA Extensions
+def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps", "true",
"Hexagon HVX instructions">;
-def FeatureHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps", "true",
+def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps", "true",
"Hexagon HVX Double instructions">;
def FeatureLongCalls: SubtargetFeature<"long-calls", "UseLongCalls", "true",
"Use constant-extended calls">;
@@ -37,19 +35,14 @@ def FeatureLongCalls: SubtargetFeature<"long-calls", "UseLongCalls", "true",
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
-def HasV5T : Predicate<"HST->hasV5TOps()">;
-def NoV5T : Predicate<"!HST->hasV5TOps()">;
-def HasV55T : Predicate<"HST->hasV55TOps()">,
- AssemblerPredicate<"ArchV55">;
-def HasV60T : Predicate<"HST->hasV60TOps()">,
- AssemblerPredicate<"ArchV60">;
+
def UseMEMOP : Predicate<"HST->useMemOps()">;
def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">;
def UseHVXDbl : Predicate<"HST->useHVXDblOps()">,
- AssemblerPredicate<"FeatureHVXDbl">;
+ AssemblerPredicate<"ExtensionHVXDbl">;
def UseHVXSgl : Predicate<"HST->useHVXSglOps()">;
def UseHVX : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">,
- AssemblerPredicate<"FeatureHVX">;
+ AssemblerPredicate<"ExtensionHVX">;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
@@ -81,7 +74,7 @@ class IntrinsicsRel;
def getPredOpcode : InstrMapping {
let FilterClass = "PredRel";
// Instructions with the same BaseOpcode and isNVStore values form a row.
- let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isNT"];
+ let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isBrTaken", "isNT"];
// Instructions with the same predicate sense form a column.
let ColFields = ["PredSense"];
// The key column is the unpredicated instructions.
@@ -132,7 +125,7 @@ def getPredNewOpcode : InstrMapping {
//
def getPredOldOpcode : InstrMapping {
let FilterClass = "PredNewRel";
- let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
+ let RowFields = ["BaseOpcode", "PredSense", "isNVStore", "isBrTaken"];
let ColFields = ["PNewValue"];
let KeyCol = ["new"];
let ValueCols = [[""]];
@@ -248,11 +241,18 @@ def getRealHWInstr : InstrMapping {
//===----------------------------------------------------------------------===//
include "HexagonSchedule.td"
include "HexagonRegisterInfo.td"
-include "HexagonCallingConv.td"
-include "HexagonInstrInfo.td"
+include "HexagonOperands.td"
+include "HexagonDepOperands.td"
+include "HexagonDepITypes.td"
+include "HexagonInstrFormats.td"
+include "HexagonDepInstrFormats.td"
+include "HexagonDepInstrInfo.td"
+include "HexagonPseudo.td"
include "HexagonPatterns.td"
+include "HexagonDepMappings.td"
include "HexagonIntrinsics.td"
include "HexagonIntrinsicsDerived.td"
+include "HexagonMapAsm2IntrinV62.gen.td"
def HexagonInstrInfo : InstrInfo;
@@ -271,7 +271,9 @@ def : Proc<"hexagonv5", HexagonModelV4,
def : Proc<"hexagonv55", HexagonModelV55,
[ArchV4, ArchV5, ArchV55]>;
def : Proc<"hexagonv60", HexagonModelV60,
- [ArchV4, ArchV5, ArchV55, ArchV60, FeatureHVX]>;
+ [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>;
+def : Proc<"hexagonv62", HexagonModelV62,
+ [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, ExtensionHVX]>;
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 54db5ad4374b..fda23f8f6b05 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -261,10 +261,34 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
return Sym;
}
+static MCInst ScaleVectorOffset(MCInst &Inst, unsigned OpNo,
+ unsigned VectorSize, MCContext &Ctx) {
+ MCInst T;
+ T.setOpcode(Inst.getOpcode());
+ for (unsigned i = 0, n = Inst.getNumOperands(); i != n; ++i) {
+ if (i != OpNo) {
+ T.addOperand(Inst.getOperand(i));
+ continue;
+ }
+ MCOperand &ImmOp = Inst.getOperand(i);
+ const auto *HE = static_cast<const HexagonMCExpr*>(ImmOp.getExpr());
+ int32_t V = cast<MCConstantExpr>(HE->getExpr())->getValue();
+ auto *NewCE = MCConstantExpr::create(V / int32_t(VectorSize), Ctx);
+ auto *NewHE = HexagonMCExpr::create(NewCE, Ctx);
+ T.addOperand(MCOperand::createExpr(NewHE));
+ }
+ return T;
+}
+
void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
const MachineInstr &MI) {
MCInst &MappedInst = static_cast <MCInst &>(Inst);
const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo();
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ unsigned VectorSize = HST.useHVXSglOps()
+ ? Hexagon::VectorRegsRegClass.getSize()
+ : Hexagon::VectorRegs128BRegClass.getSize();
switch (Inst.getOpcode()) {
default: return;
@@ -282,6 +306,36 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
break;
}
+ case Hexagon::A2_tfrf: {
+ Inst.setOpcode(Hexagon::A2_paddif);
+ Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext)));
+ break;
+ }
+
+ case Hexagon::A2_tfrt: {
+ Inst.setOpcode(Hexagon::A2_paddit);
+ Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext)));
+ break;
+ }
+
+ case Hexagon::A2_tfrfnew: {
+ Inst.setOpcode(Hexagon::A2_paddifnew);
+ Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext)));
+ break;
+ }
+
+ case Hexagon::A2_tfrtnew: {
+ Inst.setOpcode(Hexagon::A2_padditnew);
+ Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext)));
+ break;
+ }
+
+ case Hexagon::A2_zxtb: {
+ Inst.setOpcode(Hexagon::A2_andir);
+ Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(255, OutContext)));
+ break;
+ }
+
// "$dst = CONST64(#$src1)",
case Hexagon::CONST64:
if (!OutStreamer->hasRawTextSupport()) {
@@ -376,6 +430,9 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
Rs.setReg(getHexagonRegisterPair(Rs.getReg(), RI));
return;
}
+ case Hexagon::PS_call_nr:
+ Inst.setOpcode(Hexagon::J2_call);
+ break;
case Hexagon::S5_asrhub_rnd_sat_goodsyntax: {
MCOperand &MO = MappedInst.getOperand(2);
int64_t Imm;
@@ -564,6 +621,181 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
return;
}
+ case Hexagon::V6_vL32Ub_pi:
+ case Hexagon::V6_vL32b_cur_pi:
+ case Hexagon::V6_vL32b_nt_cur_pi:
+ case Hexagon::V6_vL32b_pi:
+ case Hexagon::V6_vL32b_nt_pi:
+ case Hexagon::V6_vL32b_nt_tmp_pi:
+ case Hexagon::V6_vL32b_tmp_pi:
+ case Hexagon::V6_vL32Ub_pi_128B:
+ case Hexagon::V6_vL32b_cur_pi_128B:
+ case Hexagon::V6_vL32b_nt_cur_pi_128B:
+ case Hexagon::V6_vL32b_pi_128B:
+ case Hexagon::V6_vL32b_nt_pi_128B:
+ case Hexagon::V6_vL32b_nt_tmp_pi_128B:
+ case Hexagon::V6_vL32b_tmp_pi_128B:
+ MappedInst = ScaleVectorOffset(Inst, 3, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vL32Ub_ai:
+ case Hexagon::V6_vL32b_ai:
+ case Hexagon::V6_vL32b_cur_ai:
+ case Hexagon::V6_vL32b_nt_ai:
+ case Hexagon::V6_vL32b_nt_cur_ai:
+ case Hexagon::V6_vL32b_nt_tmp_ai:
+ case Hexagon::V6_vL32b_tmp_ai:
+ case Hexagon::V6_vL32Ub_ai_128B:
+ case Hexagon::V6_vL32b_ai_128B:
+ case Hexagon::V6_vL32b_cur_ai_128B:
+ case Hexagon::V6_vL32b_nt_ai_128B:
+ case Hexagon::V6_vL32b_nt_cur_ai_128B:
+ case Hexagon::V6_vL32b_nt_tmp_ai_128B:
+ case Hexagon::V6_vL32b_tmp_ai_128B:
+ MappedInst = ScaleVectorOffset(Inst, 2, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vS32Ub_pi:
+ case Hexagon::V6_vS32b_new_pi:
+ case Hexagon::V6_vS32b_nt_new_pi:
+ case Hexagon::V6_vS32b_nt_pi:
+ case Hexagon::V6_vS32b_pi:
+ case Hexagon::V6_vS32Ub_pi_128B:
+ case Hexagon::V6_vS32b_new_pi_128B:
+ case Hexagon::V6_vS32b_nt_new_pi_128B:
+ case Hexagon::V6_vS32b_nt_pi_128B:
+ case Hexagon::V6_vS32b_pi_128B:
+ MappedInst = ScaleVectorOffset(Inst, 2, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vS32Ub_ai:
+ case Hexagon::V6_vS32b_ai:
+ case Hexagon::V6_vS32b_new_ai:
+ case Hexagon::V6_vS32b_nt_ai:
+ case Hexagon::V6_vS32b_nt_new_ai:
+ case Hexagon::V6_vS32Ub_ai_128B:
+ case Hexagon::V6_vS32b_ai_128B:
+ case Hexagon::V6_vS32b_new_ai_128B:
+ case Hexagon::V6_vS32b_nt_ai_128B:
+ case Hexagon::V6_vS32b_nt_new_ai_128B:
+ MappedInst = ScaleVectorOffset(Inst, 1, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vL32b_cur_npred_pi:
+ case Hexagon::V6_vL32b_cur_pred_pi:
+ case Hexagon::V6_vL32b_npred_pi:
+ case Hexagon::V6_vL32b_nt_cur_npred_pi:
+ case Hexagon::V6_vL32b_nt_cur_pred_pi:
+ case Hexagon::V6_vL32b_nt_npred_pi:
+ case Hexagon::V6_vL32b_nt_pred_pi:
+ case Hexagon::V6_vL32b_nt_tmp_npred_pi:
+ case Hexagon::V6_vL32b_nt_tmp_pred_pi:
+ case Hexagon::V6_vL32b_pred_pi:
+ case Hexagon::V6_vL32b_tmp_npred_pi:
+ case Hexagon::V6_vL32b_tmp_pred_pi:
+ case Hexagon::V6_vL32b_cur_npred_pi_128B:
+ case Hexagon::V6_vL32b_cur_pred_pi_128B:
+ case Hexagon::V6_vL32b_npred_pi_128B:
+ case Hexagon::V6_vL32b_nt_cur_npred_pi_128B:
+ case Hexagon::V6_vL32b_nt_cur_pred_pi_128B:
+ case Hexagon::V6_vL32b_nt_npred_pi_128B:
+ case Hexagon::V6_vL32b_nt_pred_pi_128B:
+ case Hexagon::V6_vL32b_nt_tmp_npred_pi_128B:
+ case Hexagon::V6_vL32b_nt_tmp_pred_pi_128B:
+ case Hexagon::V6_vL32b_pred_pi_128B:
+ case Hexagon::V6_vL32b_tmp_npred_pi_128B:
+ case Hexagon::V6_vL32b_tmp_pred_pi_128B:
+ MappedInst = ScaleVectorOffset(Inst, 4, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vL32b_cur_npred_ai:
+ case Hexagon::V6_vL32b_cur_pred_ai:
+ case Hexagon::V6_vL32b_npred_ai:
+ case Hexagon::V6_vL32b_nt_cur_npred_ai:
+ case Hexagon::V6_vL32b_nt_cur_pred_ai:
+ case Hexagon::V6_vL32b_nt_npred_ai:
+ case Hexagon::V6_vL32b_nt_pred_ai:
+ case Hexagon::V6_vL32b_nt_tmp_npred_ai:
+ case Hexagon::V6_vL32b_nt_tmp_pred_ai:
+ case Hexagon::V6_vL32b_pred_ai:
+ case Hexagon::V6_vL32b_tmp_npred_ai:
+ case Hexagon::V6_vL32b_tmp_pred_ai:
+ case Hexagon::V6_vL32b_cur_npred_ai_128B:
+ case Hexagon::V6_vL32b_cur_pred_ai_128B:
+ case Hexagon::V6_vL32b_npred_ai_128B:
+ case Hexagon::V6_vL32b_nt_cur_npred_ai_128B:
+ case Hexagon::V6_vL32b_nt_cur_pred_ai_128B:
+ case Hexagon::V6_vL32b_nt_npred_ai_128B:
+ case Hexagon::V6_vL32b_nt_pred_ai_128B:
+ case Hexagon::V6_vL32b_nt_tmp_npred_ai_128B:
+ case Hexagon::V6_vL32b_nt_tmp_pred_ai_128B:
+ case Hexagon::V6_vL32b_pred_ai_128B:
+ case Hexagon::V6_vL32b_tmp_npred_ai_128B:
+ case Hexagon::V6_vL32b_tmp_pred_ai_128B:
+ MappedInst = ScaleVectorOffset(Inst, 3, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vS32Ub_npred_pi:
+ case Hexagon::V6_vS32Ub_pred_pi:
+ case Hexagon::V6_vS32b_new_npred_pi:
+ case Hexagon::V6_vS32b_new_pred_pi:
+ case Hexagon::V6_vS32b_npred_pi:
+ case Hexagon::V6_vS32b_nqpred_pi:
+ case Hexagon::V6_vS32b_nt_new_npred_pi:
+ case Hexagon::V6_vS32b_nt_new_pred_pi:
+ case Hexagon::V6_vS32b_nt_npred_pi:
+ case Hexagon::V6_vS32b_nt_nqpred_pi:
+ case Hexagon::V6_vS32b_nt_pred_pi:
+ case Hexagon::V6_vS32b_nt_qpred_pi:
+ case Hexagon::V6_vS32b_pred_pi:
+ case Hexagon::V6_vS32b_qpred_pi:
+ case Hexagon::V6_vS32Ub_npred_pi_128B:
+ case Hexagon::V6_vS32Ub_pred_pi_128B:
+ case Hexagon::V6_vS32b_new_npred_pi_128B:
+ case Hexagon::V6_vS32b_new_pred_pi_128B:
+ case Hexagon::V6_vS32b_npred_pi_128B:
+ case Hexagon::V6_vS32b_nqpred_pi_128B:
+ case Hexagon::V6_vS32b_nt_new_npred_pi_128B:
+ case Hexagon::V6_vS32b_nt_new_pred_pi_128B:
+ case Hexagon::V6_vS32b_nt_npred_pi_128B:
+ case Hexagon::V6_vS32b_nt_nqpred_pi_128B:
+ case Hexagon::V6_vS32b_nt_pred_pi_128B:
+ case Hexagon::V6_vS32b_nt_qpred_pi_128B:
+ case Hexagon::V6_vS32b_pred_pi_128B:
+ case Hexagon::V6_vS32b_qpred_pi_128B:
+ MappedInst = ScaleVectorOffset(Inst, 3, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vS32Ub_npred_ai:
+ case Hexagon::V6_vS32Ub_pred_ai:
+ case Hexagon::V6_vS32b_new_npred_ai:
+ case Hexagon::V6_vS32b_new_pred_ai:
+ case Hexagon::V6_vS32b_npred_ai:
+ case Hexagon::V6_vS32b_nqpred_ai:
+ case Hexagon::V6_vS32b_nt_new_npred_ai:
+ case Hexagon::V6_vS32b_nt_new_pred_ai:
+ case Hexagon::V6_vS32b_nt_npred_ai:
+ case Hexagon::V6_vS32b_nt_nqpred_ai:
+ case Hexagon::V6_vS32b_nt_pred_ai:
+ case Hexagon::V6_vS32b_nt_qpred_ai:
+ case Hexagon::V6_vS32b_pred_ai:
+ case Hexagon::V6_vS32b_qpred_ai:
+ case Hexagon::V6_vS32Ub_npred_ai_128B:
+ case Hexagon::V6_vS32Ub_pred_ai_128B:
+ case Hexagon::V6_vS32b_new_npred_ai_128B:
+ case Hexagon::V6_vS32b_new_pred_ai_128B:
+ case Hexagon::V6_vS32b_npred_ai_128B:
+ case Hexagon::V6_vS32b_nqpred_ai_128B:
+ case Hexagon::V6_vS32b_nt_new_npred_ai_128B:
+ case Hexagon::V6_vS32b_nt_new_pred_ai_128B:
+ case Hexagon::V6_vS32b_nt_npred_ai_128B:
+ case Hexagon::V6_vS32b_nt_nqpred_ai_128B:
+ case Hexagon::V6_vS32b_nt_pred_ai_128B:
+ case Hexagon::V6_vS32b_nt_qpred_ai_128B:
+ case Hexagon::V6_vS32b_pred_ai_128B:
+ case Hexagon::V6_vS32b_qpred_ai_128B:
+ MappedInst = ScaleVectorOffset(Inst, 2, VectorSize, OutContext);
+ return;
}
}
@@ -578,13 +810,9 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MI->isBundle()) {
const MachineBasicBlock* MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator MII = MI->getIterator();
- unsigned IgnoreCount = 0;
for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
- if (MII->getOpcode() == TargetOpcode::DBG_VALUE ||
- MII->getOpcode() == TargetOpcode::IMPLICIT_DEF)
- ++IgnoreCount;
- else
+ if (!MII->isDebugValue() && !MII->isImplicitDef())
HexagonLowerToMC(MCII, &*MII, MCB, *this);
}
else
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index fe7278fde1b1..61f290ca98d7 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -46,6 +46,17 @@ using namespace llvm;
static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden,
cl::init(true), cl::desc("Preserve subregisters in tied operands"));
+static cl::opt<bool> GenExtract("hexbit-extract", cl::Hidden,
+ cl::init(true), cl::desc("Generate extract instructions"));
+static cl::opt<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden,
+ cl::init(true), cl::desc("Generate bitsplit instructions"));
+
+static cl::opt<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden,
+ cl::init(UINT_MAX));
+static unsigned CountExtract = 0;
+static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden,
+ cl::init(UINT_MAX));
+static unsigned CountBitSplit = 0;
namespace llvm {
@@ -249,8 +260,6 @@ INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit",
bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
RegisterSet &AVs) {
- MachineDomTreeNode *N = MDT->getNode(&B);
- typedef GraphTraits<MachineDomTreeNode*> GTN;
bool Changed = false;
if (T.TopDown)
@@ -262,10 +271,9 @@ bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
RegisterSet NewAVs = AVs;
NewAVs.insert(Defs);
- for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
- MachineBasicBlock *SB = (*I)->getBlock();
- Changed |= visitBlock(*SB, T, NewAVs);
- }
+ for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(&B)))
+ Changed |= visitBlock(*(DTN->getBlock()), T, NewAVs);
+
if (!T.TopDown)
Changed |= T.processBlock(B, AVs);
@@ -896,6 +904,7 @@ const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
*MRI.getTargetRegisterInfo());
auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void {
+ (void)HRI;
assert(Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo) ||
Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi));
};
@@ -983,9 +992,9 @@ bool DeadCodeElimination::isDead(unsigned R) const {
bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
bool Changed = false;
- typedef GraphTraits<MachineDomTreeNode*> GTN;
- for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
- Changed |= runOnNode(*I);
+
+ for (auto *DTN : children<MachineDomTreeNode*>(N))
+ Changed |= runOnNode(DTN);
MachineBasicBlock *B = N->getBlock();
std::vector<MachineInstr*> Instrs;
@@ -1735,10 +1744,11 @@ namespace {
// This is by no means complete
class BitSimplification : public Transformation {
public:
- BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii,
- const HexagonRegisterInfo &hri, MachineRegisterInfo &mri,
- MachineFunction &mf)
- : Transformation(true), HII(hii), HRI(hri), MRI(mri), MF(mf), BT(bt) {}
+ BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt,
+ const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri,
+ MachineRegisterInfo &mri, MachineFunction &mf)
+ : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri),
+ MF(mf), BT(bt) {}
bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
@@ -1765,9 +1775,18 @@ namespace {
const BitTracker::RegisterCell &RC);
bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
const BitTracker::RegisterCell &RC);
+ bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
const BitTracker::RegisterCell &RC);
+ bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
+
+ // Cache of created instructions to avoid creating duplicates.
+ // XXX Currently only used by genBitSplit.
+ std::vector<MachineInstr*> NewMIs;
+ const MachineDominatorTree &MDT;
const HexagonInstrInfo &HII;
const HexagonRegisterInfo &HRI;
MachineRegisterInfo &MRI;
@@ -2149,6 +2168,146 @@ bool BitSimplification::genExtractLow(MachineInstr *MI,
return false;
}
+bool BitSimplification::genBitSplit(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC,
+ const RegisterSet &AVs) {
+ if (!GenBitSplit)
+ return false;
+ if (CountBitSplit >= MaxBitSplit)
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::A4_bitsplit:
+ case Hexagon::A4_bitspliti:
+ return false;
+ }
+
+ unsigned W = RC.width();
+ if (W != 32)
+ return false;
+
+ auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned {
+ unsigned Z = C.width();
+ while (Z > 0 && C[Z-1].is(0))
+ --Z;
+ return C.width() - Z;
+ };
+
+ // Count the number of leading zeros in the target RC.
+ unsigned Z = ctlz(RC);
+ if (Z == 0 || Z == W)
+ return false;
+
+ // A simplistic analysis: assume the source register (the one being split)
+ // is fully unknown, and that all its bits are self-references.
+ const BitTracker::BitValue &B0 = RC[0];
+ if (B0.Type != BitTracker::BitValue::Ref)
+ return false;
+
+ unsigned SrcR = B0.RefI.Reg;
+ unsigned SrcSR = 0;
+ unsigned Pos = B0.RefI.Pos;
+
+ // All the non-zero bits should be consecutive bits from the same register.
+ for (unsigned i = 1; i < W-Z; ++i) {
+ const BitTracker::BitValue &V = RC[i];
+ if (V.Type != BitTracker::BitValue::Ref)
+ return false;
+ if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i)
+ return false;
+ }
+
+ // Now, find the other bitfield among AVs.
+ for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {
+ // The number of leading zeros here should be the number of trailing
+ // non-zeros in RC.
+ if (!BT.has(S))
+ continue;
+ const BitTracker::RegisterCell &SC = BT.lookup(S);
+ if (SC.width() != W || ctlz(SC) != W-Z)
+ continue;
+ // The Z lower bits should now match SrcR.
+ const BitTracker::BitValue &S0 = SC[0];
+ if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR)
+ continue;
+ unsigned P = S0.RefI.Pos;
+
+ if (Pos <= P && (Pos + W-Z) != P)
+ continue;
+ if (P < Pos && (P + Z) != Pos)
+ continue;
+ // The starting bitfield position must be at a subregister boundary.
+ if (std::min(P, Pos) != 0 && std::min(P, Pos) != 32)
+ continue;
+
+ unsigned I;
+ for (I = 1; I < Z; ++I) {
+ const BitTracker::BitValue &V = SC[I];
+ if (V.Type != BitTracker::BitValue::Ref)
+ break;
+ if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I)
+ break;
+ }
+ if (I != Z)
+ continue;
+
+ // Generate bitsplit where S is defined.
+ CountBitSplit++;
+ MachineInstr *DefS = MRI.getVRegDef(S);
+ assert(DefS != nullptr);
+ DebugLoc DL = DefS->getDebugLoc();
+ MachineBasicBlock &B = *DefS->getParent();
+ auto At = DefS->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(DefS);
+ if (MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID)
+ SrcSR = (std::min(Pos, P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo;
+ if (!validateReg({SrcR,SrcSR}, Hexagon::A4_bitspliti, 1))
+ continue;
+ unsigned ImmOp = Pos <= P ? W-Z : Z;
+
+ // Find an existing bitsplit instruction if one already exists.
+ unsigned NewR = 0;
+ for (MachineInstr *In : NewMIs) {
+ if (In->getOpcode() != Hexagon::A4_bitspliti)
+ continue;
+ MachineOperand &Op1 = In->getOperand(1);
+ if (Op1.getReg() != SrcR || Op1.getSubReg() != SrcSR)
+ continue;
+ if (In->getOperand(2).getImm() != ImmOp)
+ continue;
+ // Check if the target register is available here.
+ MachineOperand &Op0 = In->getOperand(0);
+ MachineInstr *DefI = MRI.getVRegDef(Op0.getReg());
+ assert(DefI != nullptr);
+ if (!MDT.dominates(DefI, &*At))
+ continue;
+
+ // Found one that can be reused.
+ assert(Op0.getSubReg() == 0);
+ NewR = Op0.getReg();
+ break;
+ }
+ if (!NewR) {
+ NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
+ auto NewBS = BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR)
+ .addReg(SrcR, 0, SrcSR)
+ .addImm(ImmOp);
+ NewMIs.push_back(NewBS);
+ }
+ if (Pos <= P) {
+ HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_lo, MRI);
+ HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi, MRI);
+ } else {
+ HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo, MRI);
+ HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_hi, MRI);
+ }
+ return true;
+ }
+
+ return false;
+}
+
// Check for tstbit simplification opportunity, where the bit being checked
// can be tracked back to another register. For example:
// vreg2 = S2_lsr_i_r vreg1, 5
@@ -2210,6 +2369,201 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
return false;
}
+// Detect whether RD is a bitfield extract (sign- or zero-extended) of
+// some register from the AVs set. Create a new corresponding instruction
+// at the location of MI. The intent is to recognize situations where
+// a sequence of instructions performs an operation that is equivalent to
+// an extract operation, such as a shift left followed by a shift right.
+bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC,
+ const RegisterSet &AVs) {
+ if (!GenExtract)
+ return false;
+ if (CountExtract >= MaxExtract)
+ return false;
+ CountExtract++;
+
+ unsigned W = RC.width();
+ unsigned RW = W;
+ unsigned Len;
+ bool Signed;
+
+ // The code is mostly class-independent, except for the part that generates
+ // the extract instruction, and establishes the source register (in case it
+ // needs to use a subregister).
+ const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+ if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
+ return false;
+ assert(RD.Sub == 0);
+
+ // Observation:
+ // If the cell has a form of 00..0xx..x with k zeros and n remaining
+ // bits, this could be an extractu of the n bits, but it could also be
+ // an extractu of a longer field which happens to have 0s in the top
+ // bit positions.
+ // The same logic applies to sign-extended fields.
+ //
+ // Do not check for the extended extracts, since it would expand the
+ // search space quite a bit. The search may be expensive as it is.
+
+ const BitTracker::BitValue &TopV = RC[W-1];
+
+ // Eliminate candidates that have self-referential bits, since they
+ // cannot be extracts from other registers. Also, skip registers that
+ // have compile-time constant values.
+ bool IsConst = true;
+ for (unsigned I = 0; I != W; ++I) {
+ const BitTracker::BitValue &V = RC[I];
+ if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg)
+ return false;
+ IsConst = IsConst && (V.is(0) || V.is(1));
+ }
+ if (IsConst)
+ return false;
+
+ if (TopV.is(0) || TopV.is(1)) {
+ bool S = TopV.is(1);
+ for (--W; W > 0 && RC[W-1].is(S); --W)
+ ;
+ Len = W;
+ Signed = S;
+ // The sign bit must be a part of the field being extended.
+ if (Signed)
+ ++Len;
+ } else {
+ // This could still be a sign-extended extract.
+ assert(TopV.Type == BitTracker::BitValue::Ref);
+ if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1)
+ return false;
+ for (--W; W > 0 && RC[W-1] == TopV; --W)
+ ;
+ // The top bits of RC are copies of TopV. One occurrence of TopV will
+ // be a part of the field.
+ Len = W + 1;
+ Signed = true;
+ }
+
+ // This would be just a copy. It should be handled elsewhere.
+ if (Len == RW)
+ return false;
+
+ DEBUG({
+ dbgs() << __func__ << " on reg: " << PrintReg(RD.Reg, &HRI, RD.Sub)
+ << ", MI: " << *MI;
+ dbgs() << "Cell: " << RC << '\n';
+ dbgs() << "Expected bitfield size: " << Len << " bits, "
+ << (Signed ? "sign" : "zero") << "-extended\n";
+ });
+
+ bool Changed = false;
+
+ for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(R)) {
+ if (!BT.has(R))
+ continue;
+ const BitTracker::RegisterCell &SC = BT.lookup(R);
+ unsigned SW = SC.width();
+
+ // The source can be longer than the destination, as long as its size is
+ // a multiple of the size of the destination. Also, we would need to be
+ // able to refer to the subregister in the source that would be of the
+ // same size as the destination, but only check the sizes here.
+ if (SW < RW || (SW % RW) != 0)
+ continue;
+
+ // The field can start at any offset in SC as long as it contains Len
+ // bits and does not cross subregister boundary (if the source register
+ // is longer than the destination).
+ unsigned Off = 0;
+ while (Off <= SW-Len) {
+ unsigned OE = (Off+Len)/RW;
+ if (OE != Off/RW) {
+ // The assumption here is that if the source (R) is longer than the
+ // destination, then the destination is a sequence of words of
+ // size RW, and each such word in R can be accessed via a subregister.
+ //
+ // If the beginning and the end of the field cross the subregister
+ // boundary, advance to the next subregister.
+ Off = OE*RW;
+ continue;
+ }
+ if (HBS::isEqual(RC, 0, SC, Off, Len))
+ break;
+ ++Off;
+ }
+
+ if (Off > SW-Len)
+ continue;
+
+ // Found match.
+ unsigned ExtOpc = 0;
+ if (Off == 0) {
+ if (Len == 8)
+ ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb;
+ else if (Len == 16)
+ ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth;
+ else if (Len < 10 && !Signed)
+ ExtOpc = Hexagon::A2_andir;
+ }
+ if (ExtOpc == 0) {
+ ExtOpc =
+ Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp)
+ : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup);
+ }
+ unsigned SR = 0;
+ // This only recognizes isub_lo and isub_hi.
+ if (RW != SW && RW*2 != SW)
+ continue;
+ if (RW != SW)
+ SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi;
+ Off = Off % RW;
+
+ if (!validateReg({R,SR}, ExtOpc, 1))
+ continue;
+
+ // Don't generate the same instruction as the one being optimized.
+ if (MI->getOpcode() == ExtOpc) {
+ // All possible ExtOpc's have the source in operand(1).
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ if (SrcOp.getReg() == R)
+ continue;
+ }
+
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock &B = *MI->getParent();
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
+ auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR)
+ .addReg(R, 0, SR);
+ switch (ExtOpc) {
+ case Hexagon::A2_sxtb:
+ case Hexagon::A2_zxtb:
+ case Hexagon::A2_sxth:
+ case Hexagon::A2_zxth:
+ break;
+ case Hexagon::A2_andir:
+ MIB.addImm((1u << Len) - 1);
+ break;
+ case Hexagon::S4_extract:
+ case Hexagon::S2_extractu:
+ case Hexagon::S4_extractp:
+ case Hexagon::S2_extractup:
+ MIB.addImm(Len)
+ .addImm(Off);
+ break;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ Changed = true;
+ break;
+ }
+
+ return Changed;
+}
+
bool BitSimplification::processBlock(MachineBasicBlock &B,
const RegisterSet &AVs) {
if (!BT.reached(&B))
@@ -2247,12 +2601,15 @@ bool BitSimplification::processBlock(MachineBasicBlock &B,
if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
bool T = genPackhl(MI, RD, RC);
+ T = T || simplifyExtractLow(MI, RD, RC, AVB);
Changed |= T;
continue;
}
if (FRC->getID() == Hexagon::IntRegsRegClassID) {
- bool T = genExtractHalf(MI, RD, RC);
+ bool T = genBitSplit(MI, RD, RC, AVB);
+ T = T || simplifyExtractLow(MI, RD, RC, AVB);
+ T = T || genExtractHalf(MI, RD, RC);
T = T || genCombineHalf(MI, RD, RC);
T = T || genExtractLow(MI, RD, RC);
Changed |= T;
@@ -2313,7 +2670,7 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
BT.run();
RegisterSet ABS; // Available registers for BS.
- BitSimplification BitS(BT, HII, HRI, MRI, MF);
+ BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF);
Changed |= visitBlock(Entry, BitS, ABS);
Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
@@ -2599,7 +2956,7 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) {
const MachineOperand &Op = SI->getOperand(j);
if (!Op.isReg()) {
- MIB.addOperand(Op);
+ MIB.add(Op);
continue;
}
if (!Op.isUse())
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index 436f88dcd450..90ccecb6629a 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -74,7 +74,7 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
// Module::AnyPointerSize.
if (Width == 0 || Width > 64)
break;
- AttributeSet Attrs = F.getAttributes();
+ AttributeList Attrs = F.getAttributes();
if (Attrs.hasAttribute(AttrIdx, Attribute::ByVal))
continue;
InPhysReg = getNextPhysReg(InPhysReg, Width);
@@ -272,6 +272,9 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
// cases below.
uint16_t W0 = (Reg[0].Reg != 0) ? getRegBitWidth(Reg[0]) : 0;
+ // Register id of the 0th operand. It can be 0.
+ unsigned Reg0 = Reg[0].Reg;
+
switch (Opc) {
// Transfer immediate:
@@ -792,6 +795,17 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
case A2_zxth:
return rr0(eZXT(rc(1), 16), Outputs);
+ // Saturations
+
+ case A2_satb:
+ return rr0(eSXT(RegisterCell::self(0, W0).regify(Reg0), 8), Outputs);
+ case A2_sath:
+ return rr0(eSXT(RegisterCell::self(0, W0).regify(Reg0), 16), Outputs);
+ case A2_satub:
+ return rr0(eZXT(RegisterCell::self(0, W0).regify(Reg0), 8), Outputs);
+ case A2_satuh:
+ return rr0(eZXT(RegisterCell::self(0, W0).regify(Reg0), 16), Outputs);
+
// Bit count:
case S2_cl0:
diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp
index adc213c3d438..1640b40c164f 100644
--- a/lib/Target/Hexagon/HexagonBlockRanges.cpp
+++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp
@@ -219,8 +219,7 @@ HexagonBlockRanges::HexagonBlockRanges(MachineFunction &mf)
TII(*HST.getInstrInfo()), TRI(*HST.getRegisterInfo()),
Reserved(TRI.getReservedRegs(mf)) {
// Consider all non-allocatable registers as reserved.
- for (auto I = TRI.regclass_begin(), E = TRI.regclass_end(); I != E; ++I) {
- auto *RC = *I;
+ for (const TargetRegisterClass *RC : TRI.regclasses()) {
if (RC->isAllocatable())
continue;
for (unsigned R : *RC)
@@ -233,14 +232,16 @@ HexagonBlockRanges::RegisterSet HexagonBlockRanges::getLiveIns(
const TargetRegisterInfo &TRI) {
RegisterSet LiveIns;
RegisterSet Tmp;
+
for (auto I : B.liveins()) {
- if (I.LaneMask.all()) {
- Tmp.insert({I.PhysReg,0});
+ MCSubRegIndexIterator S(I.PhysReg, &TRI);
+ if (I.LaneMask.all() || (I.LaneMask.any() && !S.isValid())) {
+ Tmp.insert({I.PhysReg, 0});
continue;
}
- for (MCSubRegIndexIterator S(I.PhysReg, &TRI); S.isValid(); ++S) {
- LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex());
- if ((M & I.LaneMask).any())
+ for (; S.isValid(); ++S) {
+ unsigned SI = S.getSubRegIndex();
+ if ((I.LaneMask & TRI.getSubRegIndexLaneMask(SI)).any())
Tmp.insert({S.getSubReg(), 0});
}
}
@@ -307,6 +308,8 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
LastUse[R] = LastDef[R] = IndexType::None;
};
+ RegisterSet Defs, Clobbers;
+
for (auto &In : B) {
if (In.isDebugValue())
continue;
@@ -325,19 +328,67 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
closeRange(S);
}
}
- // Process defs.
+ // Process defs and clobbers.
+ Defs.clear();
+ Clobbers.clear();
for (auto &Op : In.operands()) {
if (!Op.isReg() || !Op.isDef() || Op.isUndef())
continue;
RegisterRef R = { Op.getReg(), Op.getSubReg() };
- if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg])
- continue;
for (auto S : expandToSubRegs(R, MRI, TRI)) {
- if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None)
- closeRange(S);
- LastDef[S] = Index;
+ if (TargetRegisterInfo::isPhysicalRegister(S.Reg) && Reserved[S.Reg])
+ continue;
+ if (Op.isDead())
+ Clobbers.insert(S);
+ else
+ Defs.insert(S);
+ }
+ }
+
+ for (auto &Op : In.operands()) {
+ if (!Op.isRegMask())
+ continue;
+ const uint32_t *BM = Op.getRegMask();
+ for (unsigned PR = 1, N = TRI.getNumRegs(); PR != N; ++PR) {
+ // Skip registers that have subregisters. A register is preserved
+ // iff its bit is set in the regmask, so if R1:0 was preserved, both
+ // R1 and R0 would also be present.
+ if (MCSubRegIterator(PR, &TRI, false).isValid())
+ continue;
+ if (Reserved[PR])
+ continue;
+ if (BM[PR/32] & (1u << (PR%32)))
+ continue;
+ RegisterRef R = { PR, 0 };
+ if (!Defs.count(R))
+ Clobbers.insert(R);
}
}
+ // Defs and clobbers can overlap, e.g.
+ // %D0<def,dead> = COPY %vreg5, %R0<imp-def>, %R1<imp-def>
+ for (RegisterRef R : Defs)
+ Clobbers.erase(R);
+
+ // Update maps for defs.
+ for (RegisterRef S : Defs) {
+ // Defs should already be expanded into subregs.
+ assert(!TargetRegisterInfo::isPhysicalRegister(S.Reg) ||
+ !MCSubRegIterator(S.Reg, &TRI, false).isValid());
+ if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None)
+ closeRange(S);
+ LastDef[S] = Index;
+ }
+ // Update maps for clobbers.
+ for (RegisterRef S : Clobbers) {
+ // Clobbers should already be expanded into subregs.
+ assert(!TargetRegisterInfo::isPhysicalRegister(S.Reg) ||
+ !MCSubRegIterator(S.Reg, &TRI, false).isValid());
+ if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None)
+ closeRange(S);
+ // Create a single-instruction range.
+ LastDef[S] = LastUse[S] = Index;
+ closeRange(S);
+ }
}
// Collect live-on-exit.
diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td
deleted file mode 100644
index e61b2a7a58ac..000000000000
--- a/lib/Target/Hexagon/HexagonCallingConv.td
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the Hexagon architectures.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Return Value Calling Conventions
-//===----------------------------------------------------------------------===//
-
-// Hexagon 32-bit C return-value convention.
-def RetCC_Hexagon32 : CallingConv<[
- CCIfType<[i32, f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
- CCIfType<[i64, f64], CCAssignToReg<[D0, D1, D2]>>,
-
- // Alternatively, they are assigned to the stack in 4-byte aligned units.
- CCAssignToStack<4, 4>
-]>;
-
-// Hexagon 32-bit C Calling convention.
-def CC_Hexagon32 : CallingConv<[
- // All arguments get passed in integer registers if there is space.
- CCIfType<[f32, i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
- CCIfType<[f64, i64], CCAssignToReg<[D0, D1, D2]>>,
-
- // Alternatively, they are assigned to the stack in 4-byte aligned units.
- CCAssignToStack<4, 4>
-]>;
diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 489da6be923d..a07ba77e6f3e 100644
--- a/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -315,11 +315,8 @@ void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock *Root,
// visited".
Order.push_back(Root);
- DomTreeNode *DTN = DT->getNode(Root);
- typedef GraphTraits<DomTreeNode*> GTN;
- typedef GTN::ChildIteratorType Iter;
- for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
- getBlockTraversalOrder((*I)->getBlock(), Order);
+ for (auto *DTN : children<DomTreeNode*>(DT->getNode(Root)))
+ getBlockTraversalOrder(DTN->getBlock(), Order);
}
bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) {
@@ -1235,11 +1232,8 @@ void HexagonCommonGEP::removeDeadCode() {
for (unsigned i = 0; i < BO.size(); ++i) {
BasicBlock *B = cast<BasicBlock>(BO[i]);
- DomTreeNode *N = DT->getNode(B);
- typedef GraphTraits<DomTreeNode*> GTN;
- typedef GTN::ChildIteratorType Iter;
- for (Iter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
- BO.push_back((*I)->getBlock());
+ for (auto DTN : children<DomTreeNode*>(DT->getNode(B)))
+ BO.push_back(DTN->getBlock());
}
for (unsigned i = BO.size(); i > 0; --i) {
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 36080997ec6b..8118c8eb149d 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -440,22 +440,28 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
// Put instructions that last defined integer or double registers into the
// map.
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- MachineOperand &Op = MI.getOperand(I);
- if (!Op.isReg() || !Op.isDef() || !Op.getReg())
- continue;
- unsigned Reg = Op.getReg();
- if (Hexagon::DoubleRegsRegClass.contains(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- LastDef[*SubRegs] = &MI;
- }
- } else if (Hexagon::IntRegsRegClass.contains(Reg))
- LastDef[Reg] = &MI;
+ for (MachineOperand &Op : MI.operands()) {
+ if (Op.isReg()) {
+ if (!Op.isDef() || !Op.getReg())
+ continue;
+ unsigned Reg = Op.getReg();
+ if (Hexagon::DoubleRegsRegClass.contains(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LastDef[*SubRegs] = &MI;
+ } else if (Hexagon::IntRegsRegClass.contains(Reg))
+ LastDef[Reg] = &MI;
+ } else if (Op.isRegMask()) {
+ for (unsigned Reg : Hexagon::IntRegsRegClass)
+ if (Op.clobbersPhysReg(Reg))
+ LastDef[Reg] = &MI;
+ }
}
}
}
bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
if (IsCombinesDisabled) return false;
diff --git a/lib/Target/Hexagon/HexagonDepArch.h b/lib/Target/Hexagon/HexagonDepArch.h
new file mode 100644
index 000000000000..1009aa39cefb
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepArch.h
@@ -0,0 +1,10 @@
+//===--- HexagonDepArch.h -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+enum HexagonArchEnum { V4,V5,V55,V60,V62 };
diff --git a/lib/Target/Hexagon/HexagonDepArch.td b/lib/Target/Hexagon/HexagonDepArch.td
new file mode 100644
index 000000000000..5b1d02c136f0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepArch.td
@@ -0,0 +1,19 @@
+//===--- HexagonDepArch.td ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def ArchV62: SubtargetFeature<"v62", "HexagonArchVersion", "V62", "Enable Hexagon V62 architecture">;
+def HasV62T : Predicate<"HST->hasV62TOps()">, AssemblerPredicate<"ArchV62">;
+def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Enable Hexagon V60 architecture">;
+def HasV60T : Predicate<"HST->hasV60TOps()">, AssemblerPredicate<"ArchV60">;
+def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Enable Hexagon V55 architecture">;
+def HasV55T : Predicate<"HST->hasV55TOps()">, AssemblerPredicate<"ArchV55">;
+def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Enable Hexagon V4 architecture">;
+def HasV4T : Predicate<"HST->hasV4TOps()">, AssemblerPredicate<"ArchV4">;
+def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Enable Hexagon V5 architecture">;
+def HasV5T : Predicate<"HST->hasV5TOps()">, AssemblerPredicate<"ArchV5">;
diff --git a/lib/Target/Hexagon/HexagonDepDecoders.h b/lib/Target/Hexagon/HexagonDepDecoders.h
new file mode 100644
index 000000000000..aa9787ecf0c8
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepDecoders.h
@@ -0,0 +1,64 @@
+//===--- HexagonDepDecoders.h ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<4>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<14>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<8>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<7>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<12>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<3>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<13>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<6>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<9>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<5>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<6>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
diff --git a/lib/Target/Hexagon/HexagonDepITypes.h b/lib/Target/Hexagon/HexagonDepITypes.h
new file mode 100644
index 000000000000..f8ae39a37994
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepITypes.h
@@ -0,0 +1,53 @@
+//===--- HexagonDepITypes.h -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+namespace HexagonII {
+enum Type {
+ TypeALU32_2op = 0,
+ TypeALU32_3op = 1,
+ TypeALU32_ADDI = 2,
+ TypeALU64 = 3,
+ TypeCJ = 4,
+ TypeCOPROC_VMEM = 5,
+ TypeCR = 7,
+ TypeCVI_HIST = 10,
+ TypeCVI_VA = 16,
+ TypeCVI_VA_DV = 17,
+ TypeCVI_VINLANESAT = 18,
+ TypeCVI_VM_CUR_LD = 19,
+ TypeCVI_VM_LD = 20,
+ TypeCVI_VM_NEW_ST = 21,
+ TypeCVI_VM_ST = 22,
+ TypeCVI_VM_STU = 23,
+ TypeCVI_VM_TMP_LD = 24,
+ TypeCVI_VM_VP_LDU = 25,
+ TypeCVI_VP = 26,
+ TypeCVI_VP_VS = 27,
+ TypeCVI_VS = 28,
+ TypeCVI_VX = 30,
+ TypeCVI_VX_DV = 31,
+ TypeDUPLEX = 32,
+ TypeENDLOOP = 33,
+ TypeEXTENDER = 34,
+ TypeJ = 35,
+ TypeLD = 36,
+ TypeM = 37,
+ TypeMAPPING = 38,
+ TypeNCJ = 39,
+ TypePSEUDO = 40,
+ TypeST = 41,
+ TypeSUBINSN = 42,
+ TypeS_2op = 43,
+ TypeS_3op = 44,
+ TypeV2LDST = 47,
+ TypeV4LDST = 48
+};
+}
+}
diff --git a/lib/Target/Hexagon/HexagonDepITypes.td b/lib/Target/Hexagon/HexagonDepITypes.td
new file mode 100644
index 000000000000..f1d689ce12f4
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepITypes.td
@@ -0,0 +1,48 @@
+//===--- HexagonDepITypes.td ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class IType<bits<6> t> { bits<6> Value = t; }
+def TypeALU32_2op : IType<0>;
+def TypeALU32_3op : IType<1>;
+def TypeALU32_ADDI : IType<2>;
+def TypeALU64 : IType<3>;
+def TypeCJ : IType<4>;
+def TypeCOPROC_VMEM : IType<5>;
+def TypeCR : IType<7>;
+def TypeCVI_HIST : IType<10>;
+def TypeCVI_VA : IType<16>;
+def TypeCVI_VA_DV : IType<17>;
+def TypeCVI_VINLANESAT : IType<18>;
+def TypeCVI_VM_CUR_LD : IType<19>;
+def TypeCVI_VM_LD : IType<20>;
+def TypeCVI_VM_NEW_ST : IType<21>;
+def TypeCVI_VM_ST : IType<22>;
+def TypeCVI_VM_STU : IType<23>;
+def TypeCVI_VM_TMP_LD : IType<24>;
+def TypeCVI_VM_VP_LDU : IType<25>;
+def TypeCVI_VP : IType<26>;
+def TypeCVI_VP_VS : IType<27>;
+def TypeCVI_VS : IType<28>;
+def TypeCVI_VX : IType<30>;
+def TypeCVI_VX_DV : IType<31>;
+def TypeDUPLEX : IType<32>;
+def TypeENDLOOP : IType<33>;
+def TypeEXTENDER : IType<34>;
+def TypeJ : IType<35>;
+def TypeLD : IType<36>;
+def TypeM : IType<37>;
+def TypeMAPPING : IType<38>;
+def TypeNCJ : IType<39>;
+def TypePSEUDO : IType<40>;
+def TypeST : IType<41>;
+def TypeSUBINSN : IType<42>;
+def TypeS_2op : IType<43>;
+def TypeS_3op : IType<44>;
+def TypeV2LDST : IType<47>;
+def TypeV4LDST : IType<48>;
diff --git a/lib/Target/Hexagon/HexagonDepInstrFormats.td b/lib/Target/Hexagon/HexagonDepInstrFormats.td
new file mode 100644
index 000000000000..d7a99f48803b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -0,0 +1,4182 @@
+//===--- HexagonDepInstrFormats.td ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class Enc_12122225 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+ bits <3> Qd8;
+ let Inst{2-0} = Qd8{2-0};
+}
+class Enc_16626097 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vw32;
+ let Inst{4-0} = Vw32{4-0};
+}
+class Enc_13397056 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7315939 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <6> n1;
+ let Inst{28-28} = n1{5-5};
+ let Inst{24-22} = n1{4-2};
+ let Inst{13-13} = n1{1-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_15275738 : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{26-25} = Ii{11-10};
+ let Inst{13-5} = Ii{9-1};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_12822813 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+ bits <2> Pe4;
+ let Inst{6-5} = Pe4{1-0};
+}
+class Enc_10282127 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{12-7} = Ii{6-1};
+ bits <8> II;
+ let Inst{13-13} = II{7-7};
+ let Inst{6-0} = II{6-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_14264243 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> Rt16;
+ let Inst{11-8} = Rt16{3-0};
+}
+class Enc_6778937 : OpcodeHexagon {
+ bits <5> Rxx32;
+ let Inst{20-16} = Rxx32{4-0};
+ bits <0> sgp10;
+}
+class Enc_5480539 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
+}
+class Enc_11422009 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_16357011 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{8-4} = Vv32{4-0};
+ bits <5> Vt32;
+ let Inst{13-9} = Vt32{4-0};
+ bits <4> Vdd16;
+ let Inst{3-0} = Vdd16{3-0};
+}
+class Enc_4975051 : OpcodeHexagon {
+ bits <19> Ii;
+ let Inst{26-25} = Ii{18-17};
+ let Inst{20-16} = Ii{16-12};
+ let Inst{13-5} = Ii{11-3};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_14786238 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_15472748 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_6773159 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <5> II;
+ let Inst{4-0} = II{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_12535811 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_14007201 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <8> II;
+ let Inst{22-16} = II{7-1};
+ let Inst{13-13} = II{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_2577026 : OpcodeHexagon {
+ bits <3> Qt8;
+ let Inst{2-0} = Qt8{2-0};
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_7305764 : OpcodeHexagon {
+ bits <5> II;
+ let Inst{12-8} = II{4-0};
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+}
+class Enc_11682941 : OpcodeHexagon {
+ bits <19> Ii;
+ let Inst{26-25} = Ii{18-17};
+ let Inst{20-16} = Ii{16-12};
+ let Inst{13-13} = Ii{11-11};
+ let Inst{7-0} = Ii{10-3};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_16376009 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_13249928 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_1971351 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_13715847 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{17-16} = Ii{5-4};
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_13303422 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_14574598 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_13094118 : OpcodeHexagon {
+ bits <5> Css32;
+ let Inst{20-16} = Css32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_4231995 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_844699 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> n1;
+ let Inst{28-28} = n1{3-3};
+ let Inst{24-22} = n1{2-0};
+}
+class Enc_8752140 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7978128 : OpcodeHexagon {
+ bits <1> Ii;
+ let Inst{8-8} = Ii{0-0};
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+}
+class Enc_10492541 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_0 : OpcodeHexagon {
+}
+class Enc_15733946 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_738356 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_14400220 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{9-5} = Ii{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_15194851 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_14172170 : OpcodeHexagon {
+ bits <1> Ii;
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_10065510 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_14998517 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <3> n1;
+ let Inst{29-29} = n1{2-2};
+ let Inst{26-25} = n1{1-0};
+}
+class Enc_16657398 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{17-16} = Ii{5-4};
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_14620934 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_10075393 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+}
+class Enc_8638014 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Vss32;
+ let Inst{7-3} = Vss32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_13261538 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_8990840 : OpcodeHexagon {
+ bits <13> Ii;
+ let Inst{26-25} = Ii{12-11};
+ let Inst{13-5} = Ii{10-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_5974204 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_4711514 : OpcodeHexagon {
+ bits <2> Qu4;
+ let Inst{9-8} = Qu4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_11492529 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9277990 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_6690615 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-4} = Ii{6-2};
+ bits <4> Rt16;
+ let Inst{3-0} = Rt16{3-0};
+}
+class Enc_1220199 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_7785569 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <6> n1;
+ let Inst{28-28} = n1{5-5};
+ let Inst{25-22} = n1{4-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_2880796 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> II;
+ let Inst{22-21} = II{4-3};
+ let Inst{7-5} = II{2-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_6858527 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vv32;
+ let Inst{4-0} = Vv32{4-0};
+}
+class Enc_11863656 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_151014 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <2> Px4;
+ let Inst{6-5} = Px4{1-0};
+}
+class Enc_10333841 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_14044877 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-13} = Ii{5-5};
+ let Inst{7-3} = Ii{4-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_13691337 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <2> Qx4;
+ let Inst{6-5} = Qx4{1-0};
+}
+class Enc_3817033 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <3> Qt8;
+ let Inst{10-8} = Qt8{2-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_3540372 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_5200852 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_15949334 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_3831744 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_8280533 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_10969213 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vw32;
+ let Inst{4-0} = Vw32{4-0};
+}
+class Enc_3974695 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{10-4} = Ii{6-0};
+ bits <4> Rx16;
+ let Inst{3-0} = Rx16{3-0};
+}
+class Enc_7255914 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7212930 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_12781442 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
+}
+class Enc_799555 : OpcodeHexagon {
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_11083408 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{23-19} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_900013 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_9487067 : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{19-16} = Ii{11-8};
+ let Inst{12-5} = Ii{7-0};
+ bits <2> Pu4;
+ let Inst{22-21} = Pu4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_16014536 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_12419313 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> n1;
+ let Inst{28-28} = n1{3-3};
+ let Inst{24-23} = n1{2-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_5503430 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_14767681 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{23-19} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_9093094 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <8> II;
+ let Inst{22-16} = II{7-1};
+ let Inst{13-13} = II{0-0};
+ bits <2> Pu4;
+ let Inst{24-23} = Pu4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_11542684 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{27-21} = Ii{15-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_8877260 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{23-19} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_1737833 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-13} = Ii{5-5};
+ let Inst{7-3} = Ii{4-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_255516 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_10721363 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_7076358 : OpcodeHexagon {
+ bits <5> Zdd8;
+ let Inst{4-0} = Zdd8{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_11930928 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> II;
+ let Inst{22-21} = II{4-3};
+ let Inst{7-5} = II{2-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_2410156 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_6735062 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_7965855 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_5202340 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vyy32;
+ let Inst{4-0} = Vyy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10568534 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <2> Pu4;
+ let Inst{22-21} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_16730127 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_11224149 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{13-13} = Ii{7-7};
+ let Inst{7-3} = Ii{6-2};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_9772987 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <5> Rtt32;
+ let Inst{4-0} = Rtt32{4-0};
+}
+class Enc_9238139 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Zdd8;
+ let Inst{4-0} = Zdd8{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_2082775 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{11-8} = Ii{3-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_5790679 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{12-8} = Ii{8-4};
+ let Inst{4-3} = Ii{3-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_9305257 : OpcodeHexagon {
+ bits <5> Zu8;
+ let Inst{12-8} = Zu8{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_3735566 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_12654528 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vvv32;
+ let Inst{4-0} = Vvv32{4-0};
+}
+class Enc_15290236 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_11139981 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_15546666 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{10-8} = Ii{8-6};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_486163 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_2079016 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{1-0} = Ii{1-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+}
+class Enc_10095813 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_13133322 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_9422954 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{9-8} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_10642833 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_14989332 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vv32;
+ let Inst{4-0} = Vv32{4-0};
+}
+class Enc_10263630 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_13937564 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+}
+class Enc_7171569 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_2702036 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_1928953 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{9-8} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_5853469 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <2> Pe4;
+ let Inst{6-5} = Pe4{1-0};
+}
+class Enc_7692963 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_15140689 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_748676 : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{26-25} = Ii{11-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-0} = Ii{8-1};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_3372766 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7900405 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_11930027 : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{26-25} = Ii{11-10};
+ let Inst{13-5} = Ii{9-1};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+}
+class Enc_971574 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{22-21} = Ii{5-4};
+ let Inst{13-13} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <6> II;
+ let Inst{23-23} = II{5-5};
+ let Inst{4-0} = II{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{12-8} = Rd32{4-0};
+}
+class Enc_13453446 : OpcodeHexagon {
+ bits <24> Ii;
+ let Inst{24-16} = Ii{23-15};
+ let Inst{13-1} = Ii{14-2};
+}
+class Enc_6356866 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_16246706 : OpcodeHexagon {
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_5326450 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_11687333 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_2771456 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_11282123 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <8> II;
+ let Inst{13-13} = II{7-7};
+ let Inst{6-0} = II{6-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_518319 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{20-16} = Ii{5-1};
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_16104442 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_7912540 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_15560488 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7581852 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_10030031 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_3915770 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_4075554 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_11326438 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_4050532 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{26-25} = Ii{15-14};
+ let Inst{20-16} = Ii{13-9};
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-0} = Ii{7-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_14461004 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{26-25} = Ii{10-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_13344657 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{20-16} = Ii{5-1};
+ let Inst{8-8} = Ii{0-0};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_13114546 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_14530015 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <6> n1;
+ let Inst{28-28} = n1{5-5};
+ let Inst{25-23} = n1{4-2};
+ let Inst{13-13} = n1{1-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_5967898 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <6> II;
+ let Inst{13-13} = II{5-5};
+ let Inst{4-0} = II{4-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_15450971 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <6> n1;
+ let Inst{28-28} = n1{5-5};
+ let Inst{25-22} = n1{4-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_15536400 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{3-0} = Ii{5-2};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+}
+class Enc_1291652 : OpcodeHexagon {
+ bits <1> Ii;
+ let Inst{8-8} = Ii{0-0};
+}
+class Enc_5636753 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+}
+class Enc_5757366 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+}
+class Enc_9752128 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-5} = Ii{6-3};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_13618890 : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{26-25} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-0} = Ii{8-1};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_5890213 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_5582416 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_13536408 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{3-0} = Ii{3-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+}
+class Enc_9773189 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rxx32;
+ let Inst{12-8} = Rxx32{4-0};
+}
+class Enc_2152247 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+}
+class Enc_12848507 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{6-6} = Ii{0-0};
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Ru32;
+ let Inst{20-16} = Ru32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_16279406 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+}
+class Enc_1734121 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{10-8} = Ii{3-1};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rt16;
+ let Inst{3-0} = Rt16{3-0};
+}
+class Enc_766909 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <2> Pe4;
+ let Inst{6-5} = Pe4{1-0};
+}
+class Enc_4527648 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_8849208 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{12-7} = Ii{6-1};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{4-0} = Rt32{4-0};
+}
+class Enc_9894557 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <6> II;
+ let Inst{23-21} = II{5-3};
+ let Inst{7-5} = II{2-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_4109168 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+}
+class Enc_14560494 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9773167 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{12-7} = Ii{6-1};
+ bits <5> II;
+ let Inst{4-0} = II{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_1898420 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+}
+class Enc_11498120 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
+}
+class Enc_15459921 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10058269 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_10197700 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_12608570 : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{26-25} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-5} = Ii{9-1};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_4804090 : OpcodeHexagon {
+ bits <6> Ss64;
+ let Inst{21-16} = Ss64{5-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_14973146 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Qd8;
+ let Inst{5-3} = Qd8{2-0};
+}
+class Enc_5718302 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <2> Pe4;
+ let Inst{6-5} = Pe4{1-0};
+}
+class Enc_2103742 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_7564330 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_2176383 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{9-4} = Ii{5-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_7736768 : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{26-25} = Ii{11-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-0} = Ii{8-1};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_13189194 : OpcodeHexagon {
+ bits <1> Ii;
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_5154851 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_1329520 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Cdd32;
+ let Inst{4-0} = Cdd32{4-0};
+}
+class Enc_14057553 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9223889 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_10979813 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{13-13} = Ii{6-6};
+ let Inst{7-3} = Ii{5-1};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_13490067 : OpcodeHexagon {
+ bits <3> Qt8;
+ let Inst{2-0} = Qt8{2-0};
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_10076500 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{6-6} = Ii{0-0};
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Ru32;
+ let Inst{20-16} = Ru32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_163381 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{26-25} = Ii{13-12};
+ let Inst{13-5} = Ii{11-3};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_10328975 : OpcodeHexagon {
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_14939491 : OpcodeHexagon {
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_8891794 : OpcodeHexagon {
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_7723767 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_2639299 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{11-8} = Rd16{3-0};
+}
+class Enc_11552785 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_11849200 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{4-0} = Rt32{4-0};
+}
+class Enc_14868535 : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{23-22} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <2> Pu4;
+ let Inst{9-8} = Pu4{1-0};
+}
+class Enc_48594 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6608821 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+}
+class Enc_11049656 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-3} = Ii{7-3};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_117962 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{23-21} = Ii{7-5};
+ let Inst{13-13} = Ii{4-4};
+ let Inst{7-5} = Ii{3-1};
+ let Inst{3-3} = Ii{0-0};
+ bits <5> II;
+ let Inst{12-8} = II{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_5900401 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_36641 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_9626139 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_11971407 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_9852473 : OpcodeHexagon {
+ bits <13> Ii;
+ let Inst{26-25} = Ii{12-11};
+ let Inst{13-5} = Ii{10-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_6495334 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{22-21} = Ii{5-4};
+ let Inst{13-13} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rd32;
+ let Inst{12-8} = Rd32{4-0};
+}
+class Enc_1186018 : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{26-25} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-0} = Ii{8-1};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_15999208 : OpcodeHexagon {
+ bits <18> Ii;
+ let Inst{26-25} = Ii{17-16};
+ let Inst{20-16} = Ii{15-11};
+ let Inst{13-13} = Ii{10-10};
+ let Inst{7-0} = Ii{9-2};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_11477246 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_7971062 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{23-22} = Ii{15-14};
+ let Inst{20-16} = Ii{13-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_4327792 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_10326434 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_1572239 : OpcodeHexagon {
+ bits <2> Qt4;
+ let Inst{6-5} = Qt4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_6372758 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_15793331 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_11424254 : OpcodeHexagon {
+ bits <2> Qt4;
+ let Inst{6-5} = Qt4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_4983213 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{10-0} = Ii{13-3};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_16035138 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+}
+class Enc_8225953 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{13-13} = Ii{7-7};
+ let Inst{7-3} = Ii{6-2};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_4397470 : OpcodeHexagon {
+ bits <5> II;
+ let Inst{12-8} = II{4-0};
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+}
+class Enc_1004392 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
+}
+class Enc_16319737 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{26-25} = Ii{13-12};
+ let Inst{13-13} = Ii{11-11};
+ let Inst{7-0} = Ii{10-3};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_2296022 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9664427 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <3> Qss8;
+ let Inst{2-0} = Qss8{2-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_877823 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_1589406 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6900405 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_14150875 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{25-22} = n1{3-0};
+}
+class Enc_15707793 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Gd32;
+ let Inst{4-0} = Gd32{4-0};
+}
+class Enc_14689096 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{6-6} = Ii{0-0};
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Ru32;
+ let Inst{20-16} = Ru32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_9915754 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7470998 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <2> Qx4;
+ let Inst{1-0} = Qx4{1-0};
+}
+class Enc_11471622 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_14363183 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_15816255 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_5321335 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <4> Vdd16;
+ let Inst{7-4} = Vdd16{3-0};
+}
+class Enc_12702821 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_449439 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{26-25} = Ii{10-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+}
+class Enc_2054304 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <6> Sd64;
+ let Inst{5-0} = Sd64{5-0};
+}
+class Enc_236434 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{22-21} = Ii{5-4};
+ let Inst{13-13} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{12-8} = Rd32{4-0};
+}
+class Enc_5598813 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8409782 : OpcodeHexagon {
+ bits <13> Ii;
+ let Inst{26-25} = Ii{12-11};
+ let Inst{13-13} = Ii{10-10};
+ let Inst{7-0} = Ii{9-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_15182416 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{20-16} = Ii{5-1};
+ let Inst{8-8} = Ii{0-0};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_4501395 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{6-3} = Ii{6-3};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6039436 : OpcodeHexagon {
+ bits <3> Qtt8;
+ let Inst{2-0} = Qtt8{2-0};
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_476163 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
+}
+class Enc_11281763 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9929262 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
+}
+class Enc_13174858 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8437395 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_16578332 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{10-8} = Ii{8-6};
+ bits <5> Zdd8;
+ let Inst{4-0} = Zdd8{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_12829314 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+}
+class Enc_9744403 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{13-9} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{8-4} = Vv32{4-0};
+ bits <4> Vdd16;
+ let Inst{3-0} = Vdd16{3-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10968391 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <7> n1;
+ let Inst{28-28} = n1{6-6};
+ let Inst{25-22} = n1{5-2};
+ let Inst{13-13} = n1{1-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_64199 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-4} = Ii{6-2};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_11039423 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6730375 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+}
+class Enc_16213761 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{23-19} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_13204995 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{11-8} = Ii{3-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rt16;
+ let Inst{3-0} = Rt16{3-0};
+}
+class Enc_13338314 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_9920336 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <5> Rtt32;
+ let Inst{4-0} = Rtt32{4-0};
+}
+class Enc_15380240 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
+}
+class Enc_3296020 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_2428539 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> n1;
+ let Inst{28-28} = n1{3-3};
+ let Inst{24-23} = n1{2-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_10039393 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9372046 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+}
+class Enc_2901241 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_16145290 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{6-5} = Ps4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_13783220 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_12261611 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6135183 : OpcodeHexagon {
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rx16;
+ let Inst{3-0} = Rx16{3-0};
+}
+class Enc_5523416 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_13472494 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_16303398 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_3494181 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_13983714 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
+}
+class Enc_931653 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-5} = Ii{6-3};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7622936 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
+}
+class Enc_8773155 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-7} = Ii{7-2};
+ bits <5> II;
+ let Inst{4-0} = II{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_5401217 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <3> n1;
+ let Inst{28-28} = n1{2-2};
+ let Inst{24-23} = n1{1-0};
+}
+class Enc_6736678 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_3457570 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_3813442 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_3135259 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_5486172 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <3> Nt8;
+ let Inst{2-0} = Nt8{2-0};
+}
+class Enc_11081334 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vss32;
+ let Inst{7-3} = Vss32{4-0};
+}
+class Enc_9470751 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+}
+class Enc_2683366 : OpcodeHexagon {
+ bits <3> Quu8;
+ let Inst{10-8} = Quu8{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Qdd8;
+ let Inst{5-3} = Qdd8{2-0};
+}
+class Enc_15830826 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{10-0} = Ii{13-3};
+}
+class Enc_4967902 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{12-7} = Ii{6-1};
+ bits <6> II;
+ let Inst{13-13} = II{5-5};
+ let Inst{4-0} = II{4-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_14287645 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_8324216 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_913538 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Qd8;
+ let Inst{5-3} = Qd8{2-0};
+}
+class Enc_16311032 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_9864697 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <6> II;
+ let Inst{20-16} = II{5-1};
+ let Inst{13-13} = II{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_11205051 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{11-8} = Ii{5-2};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rt16;
+ let Inst{3-0} = Rt16{3-0};
+}
+class Enc_5611087 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-5} = Ii{6-3};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10915758 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8943121 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_1539665 : OpcodeHexagon {
+ bits <5> Cs32;
+ let Inst{20-16} = Cs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_8479583 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <5> n1;
+ let Inst{29-29} = n1{4-4};
+ let Inst{26-25} = n1{3-2};
+ let Inst{23-23} = n1{1-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_313333 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_11544269 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <4> n1;
+ let Inst{29-29} = n1{3-3};
+ let Inst{26-25} = n1{2-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_9018141 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Cd32;
+ let Inst{4-0} = Cd32{4-0};
+}
+class Enc_6152036 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Gdd32;
+ let Inst{4-0} = Gdd32{4-0};
+}
+class Enc_1954437 : OpcodeHexagon {
+ bits <6> Sss64;
+ let Inst{21-16} = Sss64{5-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_3742184 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_1835415 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{10-5} = Ii{6-1};
+ bits <2> Pt4;
+ let Inst{12-11} = Pt4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_1085466 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_13150110 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{26-25} = Ii{10-9};
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-0} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_6772177 : OpcodeHexagon {
+ bits <5> Zu8;
+ let Inst{12-8} = Zu8{4-0};
+ bits <5> Zd8;
+ let Inst{4-0} = Zd8{4-0};
+}
+class Enc_6616512 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_1886960 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{26-25} = Ii{15-14};
+ let Inst{20-16} = Ii{13-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_2835415 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{10-5} = Ii{7-2};
+ bits <2> Pt4;
+ let Inst{12-11} = Pt4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_14024197 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_12297800 : OpcodeHexagon {
+ bits <18> Ii;
+ let Inst{26-25} = Ii{17-16};
+ let Inst{20-16} = Ii{15-11};
+ let Inst{13-13} = Ii{10-10};
+ let Inst{7-0} = Ii{9-2};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_7254313 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_677558 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{10-5} = Ii{8-3};
+ bits <2> Pt4;
+ let Inst{12-11} = Pt4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_6223403 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_674613 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_16479122 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{7-3} = Ii{7-3};
+ bits <3> Rdd8;
+ let Inst{2-0} = Rdd8{2-0};
+}
+class Enc_11704059 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_9165078 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{8-3} = Ii{8-3};
+ bits <3> Rtt8;
+ let Inst{2-0} = Rtt8{2-0};
+}
+class Enc_15376009 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8838398 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{21-21} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <6> II;
+ let Inst{13-8} = II{5-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_2328527 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_1451363 : OpcodeHexagon {
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_4030179 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_13770697 : OpcodeHexagon {
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ry32;
+ let Inst{12-8} = Ry32{4-0};
+}
+class Enc_12212978 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_12665927 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_2082956 : OpcodeHexagon {
+ bits <32> Ii;
+ let Inst{27-16} = Ii{31-20};
+ let Inst{13-0} = Ii{19-6};
+}
+class Enc_220949 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{25-23} = n1{3-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_9939385 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{12-8} = Ii{8-4};
+ let Inst{4-3} = Ii{3-2};
+ bits <10> II;
+ let Inst{20-16} = II{9-5};
+ let Inst{7-5} = II{4-2};
+ let Inst{1-0} = II{1-0};
+}
+class Enc_2117024 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-8} = Ii{7-3};
+ let Inst{4-2} = Ii{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8390029 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_10989558 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_5972412 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_12851489 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vss32;
+ let Inst{7-3} = Vss32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9554661 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_4202401 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6091631 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{9-8} = Qs4{1-0};
+ bits <2> Qt4;
+ let Inst{23-22} = Qt4{1-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
+}
+class Enc_10157519 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_4835423 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{10-5} = Ii{5-0};
+ bits <2> Pt4;
+ let Inst{12-11} = Pt4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_14046916 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <5> Rt32;
+ let Inst{4-0} = Rt32{4-0};
+}
+class Enc_2921694 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_8732960 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-8} = Ii{7-3};
+ let Inst{4-2} = Ii{2-0};
+}
+class Enc_5338033 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{24-22} = n1{3-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_6956613 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_2153798 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_16210172 : OpcodeHexagon {
+ bits <3> Qt8;
+ let Inst{10-8} = Qt8{2-0};
+ bits <3> Qd8;
+ let Inst{5-3} = Qd8{2-0};
+}
+class Enc_5023792 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_1244745 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_10002182 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{26-25} = Ii{10-9};
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-0} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_12492533 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_1774350 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{17-16} = Ii{5-4};
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_2703240 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+}
+class Enc_6975103 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_9789480 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_12244921 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8674673 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <5> n1;
+ let Inst{29-29} = n1{4-4};
+ let Inst{26-25} = n1{3-2};
+ let Inst{23-22} = n1{1-0};
+}
+class Enc_8514936 : OpcodeHexagon {
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_13455308 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_10188026 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_3158657 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10597934 : OpcodeHexagon {
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+ bits <2> n1;
+ let Inst{9-8} = n1{1-0};
+}
+class Enc_10612292 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <2> Qx4;
+ let Inst{1-0} = Qx4{1-0};
+}
+class Enc_5178985 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_3967902 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-7} = Ii{7-2};
+ bits <6> II;
+ let Inst{13-13} = II{5-5};
+ let Inst{4-0} = II{4-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_2462143 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_9849208 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-7} = Ii{7-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{4-0} = Rt32{4-0};
+}
+class Enc_12618352 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_7303598 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+}
+class Enc_13823098 : OpcodeHexagon {
+ bits <5> Gss32;
+ let Inst{20-16} = Gss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_16388420 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vw32;
+ let Inst{4-0} = Vw32{4-0};
+}
+class Enc_8328140 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_1793896 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_4944558 : OpcodeHexagon {
+ bits <2> Qu4;
+ let Inst{9-8} = Qu4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_13211717 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{20-16} = Vvv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_8170340 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+ bits <3> Qdd8;
+ let Inst{2-0} = Qdd8{2-0};
+}
+class Enc_14071773 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_8605375 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_12711252 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{9-8} = Pv4{1-0};
+}
+class Enc_8202458 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_8577055 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{25-23} = n1{3-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_1409050 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_7466005 : OpcodeHexagon {
+ bits <5> Gs32;
+ let Inst{20-16} = Gs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_2380082 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_10067774 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_11000933 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <3> Nt8;
+ let Inst{2-0} = Nt8{2-0};
+}
+class Enc_13201267 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_1989309 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vvv32;
+ let Inst{4-0} = Vvv32{4-0};
+}
+class Enc_9082775 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_8065534 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_4631106 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <2> Pu4;
+ let Inst{7-6} = Pu4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_11065510 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6673186 : OpcodeHexagon {
+ bits <13> Ii;
+ let Inst{26-25} = Ii{12-11};
+ let Inst{13-13} = Ii{10-10};
+ let Inst{7-0} = Ii{9-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_8498433 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_4395009 : OpcodeHexagon {
+ bits <7> Ii;
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10926598 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
+}
+class Enc_7606379 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_8131399 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_11522288 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_114098 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_5654851 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_12023037 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{6-5} = Ps4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_176263 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{9-4} = Ii{7-2};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_6130414 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{23-22} = Ii{15-14};
+ let Inst{13-0} = Ii{13-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_631197 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <6> II;
+ let Inst{23-21} = II{5-3};
+ let Inst{7-5} = II{2-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_16214129 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_8333157 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_4834775 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{13-8} = II{5-0};
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rd16;
+ let Inst{19-16} = Rd16{3-0};
+}
+class Enc_16601956 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_15946706 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{6-5} = Ii{1-0};
+ bits <3> Rdd8;
+ let Inst{2-0} = Rdd8{2-0};
+}
+class Enc_6923828 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+}
+class Enc_1332717 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_1786883 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <6> Sdd64;
+ let Inst{5-0} = Sdd64{5-0};
+}
+class Enc_14303394 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9282127 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-7} = Ii{7-2};
+ bits <8> II;
+ let Inst{13-13} = II{7-7};
+ let Inst{6-0} = II{6-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_2813446 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_364753 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <4> n1;
+ let Inst{29-29} = n1{3-3};
+ let Inst{26-25} = n1{2-1};
+ let Inst{23-23} = n1{0-0};
+}
+class Enc_12477789 : OpcodeHexagon {
+ bits <15> Ii;
+ let Inst{21-21} = Ii{14-14};
+ let Inst{13-13} = Ii{13-13};
+ let Inst{11-1} = Ii{12-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_44555 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_8497723 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_4359901 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <4> n1;
+ let Inst{29-29} = n1{3-3};
+ let Inst{26-25} = n1{2-1};
+ let Inst{22-22} = n1{0-0};
+}
+class Enc_11271630 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{6-3} = Ii{6-3};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10501894 : OpcodeHexagon {
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <3> Rdd8;
+ let Inst{2-0} = Rdd8{2-0};
+}
+class Enc_9768377 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_16268019 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_8814718 : OpcodeHexagon {
+ bits <18> Ii;
+ let Inst{26-25} = Ii{17-16};
+ let Inst{20-16} = Ii{15-11};
+ let Inst{13-5} = Ii{10-2};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_6212930 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_5462762 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vw32;
+ let Inst{4-0} = Vw32{4-0};
+}
+class Enc_6154421 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{13-13} = Ii{6-6};
+ let Inst{7-3} = Ii{5-1};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_8940892 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_3531000 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{11-5} = Ii{6-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_14311138 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_2216485 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{22-21} = Ii{5-4};
+ let Inst{13-13} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_12395768 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{26-25} = Ii{15-14};
+ let Inst{20-16} = Ii{13-9};
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-0} = Ii{7-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_11047413 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_1256611 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_7884306 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{8-4} = Ii{7-3};
+}
+class Enc_11244923 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8612939 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <5> n1;
+ let Inst{29-29} = n1{4-4};
+ let Inst{26-25} = n1{3-2};
+ let Inst{22-22} = n1{1-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_16355964 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_12616482 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_5915771 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{24-22} = n1{3-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_14459927 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7504828 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_14209223 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_3931661 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_13606251 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{11-8} = Ii{5-2};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_11475992 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_13133231 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_9959498 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{22-21} = Ii{7-6};
+ let Inst{13-13} = Ii{5-5};
+ let Inst{7-5} = Ii{4-2};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{12-8} = Rd32{4-0};
+}
+class Enc_8919369 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{24-23} = n1{3-2};
+ let Inst{13-13} = n1{1-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_2968094 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{11-5} = Ii{6-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_4813442 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_4684887 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> n1;
+ let Inst{28-28} = n1{3-3};
+ let Inst{25-23} = n1{2-0};
+}
+class Enc_15606259 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{11-8} = Ii{3-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_2268028 : OpcodeHexagon {
+ bits <3> Qtt8;
+ let Inst{10-8} = Qtt8{2-0};
+ bits <3> Qdd8;
+ let Inst{5-3} = Qdd8{2-0};
+}
+class Enc_13430430 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <3> Qxx8;
+ let Inst{2-0} = Qxx8{2-0};
+}
+class Enc_13336212 : OpcodeHexagon {
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+ bits <1> n1;
+ let Inst{9-9} = n1{0-0};
+}
+class Enc_15008287 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
+}
+class Enc_4897205 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{9-8} = Qs4{1-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
+}
+class Enc_8038806 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{11-8} = Ii{3-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_12669374 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_971347 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_1997594 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_11940513 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <5> Rt32;
+ let Inst{4-0} = Rt32{4-0};
+}
+class Enc_2735552 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_16410950 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6226085 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> II;
+ let Inst{22-21} = II{4-3};
+ let Inst{7-5} = II{2-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_14193700 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_15763937 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <6> n1;
+ let Inst{29-29} = n1{5-5};
+ let Inst{26-25} = n1{4-3};
+ let Inst{23-22} = n1{2-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_2492727 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_13425035 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_4135257 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{10-8} = Ii{3-1};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_14631806 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_12397062 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_11959851 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{6-3} = Ii{6-3};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
diff --git a/lib/Target/Hexagon/HexagonDepInstrInfo.td b/lib/Target/Hexagon/HexagonDepInstrInfo.td
new file mode 100644
index 000000000000..2bfde9acaea9
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -0,0 +1,45573 @@
+//===--- HexagonDepInstrInfo.td -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def A2_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = abs($Rs32)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_absp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = abs($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000000100;
+}
+def A2_abssat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = abs($Rs32):sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_add : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = add($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, PredNewRel, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_add";
+let InputType = "reg";
+let BaseOpcode = "A2_add";
+let isCommutable = 1;
+let isPredicable = 1;
+}
+def A2_addh_h16_hh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.h,$Rs32.h):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_addh_h16_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.h,$Rs32.l):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_addh_h16_lh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.h):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_addh_h16_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.l):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_addh_h16_sat_hh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.h,$Rs32.h):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_addh_h16_sat_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.h,$Rs32.l):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_addh_h16_sat_lh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.h):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_addh_h16_sat_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.l):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_addh_l16_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.h)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_addh_l16_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.l)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_addh_l16_sat_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.h):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_addh_l16_sat_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.l):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_addi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = add($Rs32,#$Ii)",
+ALU32_ADDI_tc_1_SLOT0123, TypeALU32_ADDI>, Enc_11542684, PredNewRel, ImmRegRel {
+let Inst{31-28} = 0b1011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_add";
+let InputType = "imm";
+let BaseOpcode = "A2_addi";
+let isPredicable = 1;
+let isAdd = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def A2_addp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = add($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+let isCommutable = 1;
+let isAdd = 1;
+}
+def A2_addpsat : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = add($Rss32,$Rtt32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+let Defs = [USR_OVF];
+let isCommutable = 1;
+}
+def A2_addsat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = add($Rs32,$Rt32):sat",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A2_addsp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"$Rdd32 = add($Rs32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64> {
+let isPseudo = 1;
+}
+def A2_addsph : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = add($Rss32,$Rtt32):raw:hi",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+}
+def A2_addspl : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = add($Rss32,$Rtt32):raw:lo",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+}
+def A2_and : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = and($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, PredNewRel, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_and";
+let InputType = "reg";
+let BaseOpcode = "A2_and";
+let isCommutable = 1;
+let isPredicable = 1;
+}
+def A2_andir : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = and($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_13472494, ImmRegRel {
+let Inst{31-22} = 0b0111011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_and";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def A2_andp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = and($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011111;
+let isCommutable = 1;
+}
+def A2_aslh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = aslh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01110000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_aslh";
+let isPredicable = 1;
+}
+def A2_asrh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = asrh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01110000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_asrh";
+let isPredicable = 1;
+}
+def A2_combine_hh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = combine($Rt32.h,$Rs32.h)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A2_combine_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = combine($Rt32.h,$Rs32.l)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A2_combine_lh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = combine($Rt32.l,$Rs32.h)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A2_combine_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = combine($Rt32.l,$Rs32.l)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A2_combineii : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins s32_0Imm:$Ii, s8_0Imm:$II),
+"$Rdd32 = combine(#$Ii,#$II)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_14007201 {
+let Inst{31-23} = 0b011111000;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+let isMoveImm = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A2_combinew : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = combine($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1997594, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110101000;
+let InputType = "reg";
+let BaseOpcode = "A2_combinew";
+let isPredicable = 1;
+}
+def A2_max : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = max($Rs32,$Rt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_maxp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = max($Rss32,$Rtt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_maxu : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = maxu($Rs32,$Rt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_maxup : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = maxu($Rss32,$Rtt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_min : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = min($Rt32,$Rs32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_minp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = min($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_minu : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = minu($Rt32,$Rs32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_minup : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = minu($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_neg : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = neg($Rs32)",
+PSEUDO, TypeALU32_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_negp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = neg($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10000000100;
+}
+def A2_negsat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = neg($Rs32):sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_nop : HInst<
+(outs),
+(ins),
+"nop",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_0 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-16} = 0b0111111100000000;
+}
+def A2_not : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = not($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_notp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = not($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000000100;
+}
+def A2_or : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = or($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, PredNewRel, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_or";
+let InputType = "reg";
+let BaseOpcode = "A2_or";
+let isCommutable = 1;
+let isPredicable = 1;
+}
+def A2_orir : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = or($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_13472494, ImmRegRel {
+let Inst{31-22} = 0b0111011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_or";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def A2_orp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = or($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011111;
+let isCommutable = 1;
+}
+def A2_paddf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4) $Rd32 = add($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel, ImmRegRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_add";
+let InputType = "reg";
+let BaseOpcode = "A2_add";
+}
+def A2_paddfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4.new) $Rd32 = add($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel, ImmRegRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_add";
+let InputType = "reg";
+let BaseOpcode = "A2_add";
+}
+def A2_paddif : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
+"if (!$Pu4) $Rd32 = add($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b011101001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_add";
+let InputType = "imm";
+let BaseOpcode = "A2_addi";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A2_paddifnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
+"if (!$Pu4.new) $Rd32 = add($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b1;
+let Inst{31-23} = 0b011101001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_add";
+let InputType = "imm";
+let BaseOpcode = "A2_addi";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A2_paddit : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
+"if ($Pu4) $Rd32 = add($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b011101000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_add";
+let InputType = "imm";
+let BaseOpcode = "A2_addi";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A2_padditnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
+"if ($Pu4.new) $Rd32 = add($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b1;
+let Inst{31-23} = 0b011101000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_add";
+let InputType = "imm";
+let BaseOpcode = "A2_addi";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A2_paddt : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4) $Rd32 = add($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel, ImmRegRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111011000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_add";
+let InputType = "reg";
+let BaseOpcode = "A2_add";
+}
+def A2_paddtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4.new) $Rd32 = add($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel, ImmRegRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111011000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_add";
+let InputType = "reg";
+let BaseOpcode = "A2_add";
+}
+def A2_pandf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4) $Rd32 = and($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111001000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_and";
+}
+def A2_pandfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4.new) $Rd32 = and($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111001000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_and";
+}
+def A2_pandt : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4) $Rd32 = and($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111001000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_and";
+}
+def A2_pandtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4.new) $Rd32 = and($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111001000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_and";
+}
+def A2_porf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4) $Rd32 = or($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_or";
+}
+def A2_porfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4.new) $Rd32 = or($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_or";
+}
+def A2_port : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4) $Rd32 = or($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111001001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_or";
+}
+def A2_portnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4.new) $Rd32 = or($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111001001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_or";
+}
+def A2_psubf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = sub($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1332717, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sub";
+}
+def A2_psubfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = sub($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1332717, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_sub";
+}
+def A2_psubt : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = sub($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1332717, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111011001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sub";
+}
+def A2_psubtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = sub($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1332717, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111011001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_sub";
+}
+def A2_pxorf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4) $Rd32 = xor($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111001011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_xor";
+}
+def A2_pxorfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4.new) $Rd32 = xor($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111001011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_xor";
+}
+def A2_pxort : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4) $Rd32 = xor($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111001011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_xor";
+}
+def A2_pxortnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4.new) $Rd32 = xor($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111001011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_xor";
+}
+def A2_roundsat : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = round($Rss32):sat",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = sat($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_satb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = satb($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_sath : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = sath($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_satub : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = satub($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_satuh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = satuh($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_sub : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375, PredNewRel, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_sub";
+let InputType = "reg";
+let BaseOpcode = "A2_sub";
+let isPredicable = 1;
+}
+def A2_subh_h16_hh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.h,$Rs32.h):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_subh_h16_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.h,$Rs32.l):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_subh_h16_lh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.h):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_subh_h16_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.l):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_subh_h16_sat_hh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.h,$Rs32.h):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_subh_h16_sat_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.h,$Rs32.l):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_subh_h16_sat_lh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.h):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_subh_h16_sat_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.l):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_subh_l16_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.h)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_subh_l16_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.l)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_subh_l16_sat_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.h):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_subh_l16_sat_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.l):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_subp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = sub($Rtt32,$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+}
+def A2_subri : HInst<
+(outs IntRegs:$Rd32),
+(ins s32_0Imm:$Ii, IntRegs:$Rs32),
+"$Rd32 = sub(#$Ii,$Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_13472494, PredNewRel, ImmRegRel {
+let Inst{31-22} = 0b0111011001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_sub";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def A2_subsat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32,$Rs32):sat",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+let InputType = "reg";
+}
+def A2_svaddh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vaddh($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A2_svaddhs : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vaddh($Rs32,$Rt32):sat",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A2_svadduhs : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vadduh($Rs32,$Rt32):sat",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A2_svavgh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vavgh($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A2_svavghs : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vavgh($Rs32,$Rt32):rnd",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A2_svnavgh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = vnavgh($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A2_svsubh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = vsubh($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A2_svsubhs : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = vsubh($Rt32,$Rs32):sat",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+let InputType = "reg";
+}
+def A2_svsubuhs : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = vsubuh($Rt32,$Rs32):sat",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+let InputType = "reg";
+}
+def A2_swiz : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = swiz($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_sxtb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = sxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01110000101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sxtb";
+let isPredicable = 1;
+}
+def A2_sxth : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = sxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01110000111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sxth";
+let isPredicable = 1;
+}
+def A2_sxtw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = sxtw($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10000100010;
+}
+def A2_tfr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = $Rs32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01110000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+let BaseOpcode = "A2_tfr";
+let isPredicable = 1;
+}
+def A2_tfrcrr : HInst<
+(outs IntRegs:$Rd32),
+(ins CtrRegs:$Cs32),
+"$Rd32 = $Cs32",
+CR_tc_3x_SLOT3, TypeCR>, Enc_1539665 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01101010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_tfrf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = $Rs32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel, ImmRegRel {
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_tfr";
+let InputType = "reg";
+let BaseOpcode = "A2_tfr";
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_tfrfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = $Rs32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel, ImmRegRel {
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_tfr";
+let InputType = "reg";
+let BaseOpcode = "A2_tfr";
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_tfrih : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, u16_0Imm:$Ii),
+"$Rx32.h = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_6130414 {
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def A2_tfril : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, u16_0Imm:$Ii),
+"$Rx32.l = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_6130414 {
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def A2_tfrp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = $Rss32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel {
+let BaseOpcode = "A2_tfrp";
+let isPredicable = 1;
+let isPseudo = 1;
+}
+def A2_tfrpf : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
+"if (!$Pu4) $Rdd32 = $Rss32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel {
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let BaseOpcode = "A2_tfrp";
+let isPseudo = 1;
+}
+def A2_tfrpfnew : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
+"if (!$Pu4.new) $Rdd32 = $Rss32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel {
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_tfrp";
+let isPseudo = 1;
+}
+def A2_tfrpi : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins s8_0Imm:$Ii),
+"$Rdd32 = #$Ii",
+ALU64_tc_1_SLOT23, TypeALU64> {
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+let isMoveImm = 1;
+let isPseudo = 1;
+}
+def A2_tfrpt : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
+"if ($Pu4) $Rdd32 = $Rss32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel {
+let isPredicated = 1;
+let BaseOpcode = "A2_tfrp";
+let isPseudo = 1;
+}
+def A2_tfrptnew : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
+"if ($Pu4.new) $Rdd32 = $Rss32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel {
+let isPredicated = 1;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_tfrp";
+let isPseudo = 1;
+}
+def A2_tfrrcr : HInst<
+(outs CtrRegs:$Cd32),
+(ins IntRegs:$Rs32),
+"$Cd32 = $Rs32",
+CR_tc_3x_SLOT3, TypeCR>, Enc_9018141 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01100010001;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_tfrsi : HInst<
+(outs IntRegs:$Rd32),
+(ins s32_0Imm:$Ii),
+"$Rd32 = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_7971062, PredNewRel, ImmRegRel {
+let Inst{21-21} = 0b0;
+let Inst{31-24} = 0b01111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_tfr";
+let InputType = "imm";
+let BaseOpcode = "A2_tfrsi";
+let isPredicable = 1;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+let isMoveImm = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def A2_tfrt : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = $Rs32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel, ImmRegRel {
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_tfr";
+let InputType = "reg";
+let BaseOpcode = "A2_tfr";
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_tfrtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = $Rs32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel, ImmRegRel {
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_tfr";
+let InputType = "reg";
+let BaseOpcode = "A2_tfr";
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_vabsh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vabsh($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000000010;
+}
+def A2_vabshsat : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vabsh($Rss32):sat",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10000000010;
+let Defs = [USR_OVF];
+}
+def A2_vabsw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vabsw($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000000010;
+}
+def A2_vabswsat : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vabsw($Rss32):sat",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10000000010;
+let Defs = [USR_OVF];
+}
+def A2_vaddb_map : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddb($Rss32,$Rtt32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_vaddh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddh($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+}
+def A2_vaddhs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddh($Rss32,$Rtt32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+let Defs = [USR_OVF];
+}
+def A2_vaddub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddub($Rss32,$Rtt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+}
+def A2_vaddubs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddub($Rss32,$Rtt32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+let Defs = [USR_OVF];
+}
+def A2_vadduhs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vadduh($Rss32,$Rtt32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+let Defs = [USR_OVF];
+}
+def A2_vaddw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddw($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+}
+def A2_vaddws : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddw($Rss32,$Rtt32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+let Defs = [USR_OVF];
+}
+def A2_vavgh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgh($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+}
+def A2_vavghcr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgh($Rss32,$Rtt32):crnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
+}
+def A2_vavghr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgh($Rss32,$Rtt32):rnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+}
+def A2_vavgub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgub($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+}
+def A2_vavgubr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgub($Rss32,$Rtt32):rnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+}
+def A2_vavguh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavguh($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+}
+def A2_vavguhr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavguh($Rss32,$Rtt32):rnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+}
+def A2_vavguw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavguw($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+}
+def A2_vavguwr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavguw($Rss32,$Rtt32):rnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+}
+def A2_vavgw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgw($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+}
+def A2_vavgwcr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgw($Rss32,$Rtt32):crnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
+}
+def A2_vavgwr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgw($Rss32,$Rtt32):rnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+}
+def A2_vcmpbeq : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmpb.eq($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b110000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmpbgtu : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmpb.gtu($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b111000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmpheq : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmph.eq($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmphgt : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmph.gt($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmphgtu : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmph.gtu($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b101000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmpweq : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmpw.eq($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmpwgt : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmpw.gt($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmpwgtu : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmpw.gtu($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vconj : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vconj($Rss32):sat",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10000000100;
+let Defs = [USR_OVF];
+}
+def A2_vmaxb : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vmaxb($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_vmaxh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vmaxh($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_vmaxub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vmaxub($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_vmaxuh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vmaxuh($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_vmaxuw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vmaxuw($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_vmaxw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vmaxw($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_vminb : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vminb($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_vminh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vminh($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_vminub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vminub($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_vminuh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vminuh($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_vminuw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vminuw($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_vminw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vminw($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_vnavgh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vnavgh($Rtt32,$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011100;
+}
+def A2_vnavghcr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vnavgh($Rtt32,$Rss32):crnd:sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A2_vnavghr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vnavgh($Rtt32,$Rss32):rnd:sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A2_vnavgw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vnavgw($Rtt32,$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011100;
+}
+def A2_vnavgwcr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vnavgw($Rtt32,$Rss32):crnd:sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A2_vnavgwr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vnavgw($Rtt32,$Rss32):rnd:sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A2_vraddub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vraddub($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+}
+def A2_vraddub_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vraddub($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A2_vrsadub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrsadub($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+}
+def A2_vrsadub_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrsadub($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A2_vsubb_map : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vsubb($Rss32,$Rtt32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_vsubh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubh($Rtt32,$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+}
+def A2_vsubhs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubh($Rtt32,$Rss32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+let Defs = [USR_OVF];
+}
+def A2_vsubub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubub($Rtt32,$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+}
+def A2_vsububs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubub($Rtt32,$Rss32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+let Defs = [USR_OVF];
+}
+def A2_vsubuhs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubuh($Rtt32,$Rss32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+let Defs = [USR_OVF];
+}
+def A2_vsubw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubw($Rtt32,$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+}
+def A2_vsubws : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubw($Rtt32,$Rss32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+let Defs = [USR_OVF];
+}
+def A2_xor : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = xor($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+let BaseOpcode = "A2_xor";
+let isCommutable = 1;
+let isPredicable = 1;
+}
+def A2_xorp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = xor($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011111;
+let isCommutable = 1;
+}
+def A2_zxtb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = zxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel {
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_zxtb";
+let isPredicable = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_zxth : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = zxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01110000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_zxth";
+let isPredicable = 1;
+}
+def A4_addp_c : HInst<
+(outs DoubleRegs:$Rdd32, PredRegs:$Px4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in),
+"$Rdd32 = add($Rss32,$Rtt32,$Px4):carry",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_151014 {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000010110;
+let isPredicateLate = 1;
+let Constraints = "$Px4 = $Px4in";
+}
+def A4_andn : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = and($Rt32,~$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A4_andnp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = and($Rtt32,~$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011111;
+}
+def A4_bitsplit : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = bitsplit($Rs32,$Rt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010100001;
+}
+def A4_bitspliti : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rdd32 = bitsplit($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_5654851 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001000110;
+}
+def A4_boundscheck : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"$Pd4 = boundscheck($Rs32,$Rtt32)",
+M_tc_3x_SLOT23, TypeALU64> {
+let isPseudo = 1;
+}
+def A4_boundscheck_hi : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = boundscheck($Rss32,$Rtt32):raw:hi",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b101000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11010010000;
+}
+def A4_boundscheck_lo : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = boundscheck($Rss32,$Rtt32):raw:lo",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11010010000;
+}
+def A4_cmpbeq : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmpb.eq($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b110000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111110;
+let CextOpcode = "A4_cmpbeq";
+let InputType = "reg";
+let isCommutable = 1;
+let isCompare = 1;
+}
+def A4_cmpbeqi : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u8_0Imm:$Ii),
+"$Pd4 = cmpb.eq($Rs32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_6736678, ImmRegRel {
+let Inst{4-2} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011101000;
+let CextOpcode = "A4_cmpbeq";
+let InputType = "imm";
+let isCommutable = 1;
+let isCompare = 1;
+}
+def A4_cmpbgt : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmpb.gt($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111110;
+let CextOpcode = "A4_cmpbgt";
+let InputType = "reg";
+let isCompare = 1;
+}
+def A4_cmpbgti : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s8_0Imm:$Ii),
+"$Pd4 = cmpb.gt($Rs32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_6736678, ImmRegRel {
+let Inst{4-2} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011101001;
+let CextOpcode = "A4_cmpbgt";
+let InputType = "imm";
+let isCompare = 1;
+}
+def A4_cmpbgtu : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmpb.gtu($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b111000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111110;
+let CextOpcode = "A4_cmpbgtu";
+let InputType = "reg";
+let isCompare = 1;
+}
+def A4_cmpbgtui : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Pd4 = cmpb.gtu($Rs32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3531000, ImmRegRel {
+let Inst{4-2} = 0b000;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b11011101010;
+let CextOpcode = "A4_cmpbgtu";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 0;
+}
+def A4_cmpheq : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmph.eq($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111110;
+let CextOpcode = "A4_cmpheq";
+let InputType = "reg";
+let isCommutable = 1;
+let isCompare = 1;
+}
+def A4_cmpheqi : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = cmph.eq($Rs32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_6736678, ImmRegRel {
+let Inst{4-2} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011101000;
+let CextOpcode = "A4_cmpheq";
+let InputType = "imm";
+let isCommutable = 1;
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A4_cmphgt : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmph.gt($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111110;
+let CextOpcode = "A4_cmphgt";
+let InputType = "reg";
+let isCompare = 1;
+}
+def A4_cmphgti : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = cmph.gt($Rs32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_6736678, ImmRegRel {
+let Inst{4-2} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011101001;
+let CextOpcode = "A4_cmphgt";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A4_cmphgtu : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmph.gtu($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b101000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111110;
+let CextOpcode = "A4_cmphgtu";
+let InputType = "reg";
+let isCompare = 1;
+}
+def A4_cmphgtui : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Pd4 = cmph.gtu($Rs32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3531000, ImmRegRel {
+let Inst{4-2} = 0b010;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b11011101010;
+let CextOpcode = "A4_cmphgtu";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 0;
+}
+def A4_combineii : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins s8_0Imm:$Ii, u32_0Imm:$II),
+"$Rdd32 = combine(#$Ii,#$II)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9864697 {
+let Inst{31-21} = 0b01111100100;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def A4_combineir : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins s32_0Imm:$Ii, IntRegs:$Rs32),
+"$Rdd32 = combine(#$Ii,$Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_2462143 {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b01110011001;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A4_combineri : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rdd32 = combine($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_2462143 {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b01110011000;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A4_cround_ri : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = cround($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def A4_cround_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = cround($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def A4_ext : HInst<
+(outs),
+(ins u26_6Imm:$Ii),
+"immext(#$Ii)",
+EXTENDER_tc_1_SLOT0123, TypeEXTENDER>, Enc_2082956 {
+let Inst{31-28} = 0b0000;
+}
+def A4_modwrapu : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = modwrap($Rs32,$Rt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def A4_orn : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = or($Rt32,~$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A4_ornp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = or($Rtt32,~$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011111;
+}
+def A4_paslhf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = aslh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b01110000000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_aslh";
+}
+def A4_paslhfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = aslh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1011;
+let Inst{31-21} = 0b01110000000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_aslh";
+}
+def A4_paslht : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = aslh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b01110000000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_aslh";
+}
+def A4_paslhtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = aslh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b01110000000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_aslh";
+}
+def A4_pasrhf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = asrh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b01110000001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_asrh";
+}
+def A4_pasrhfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = asrh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1011;
+let Inst{31-21} = 0b01110000001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_asrh";
+}
+def A4_pasrht : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = asrh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b01110000001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_asrh";
+}
+def A4_pasrhtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = asrh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b01110000001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_asrh";
+}
+def A4_psxtbf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = sxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b01110000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sxtb";
+}
+def A4_psxtbfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = sxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1011;
+let Inst{31-21} = 0b01110000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_sxtb";
+}
+def A4_psxtbt : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = sxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b01110000101;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sxtb";
+}
+def A4_psxtbtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = sxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b01110000101;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_sxtb";
+}
+def A4_psxthf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = sxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b01110000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sxth";
+}
+def A4_psxthfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = sxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1011;
+let Inst{31-21} = 0b01110000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_sxth";
+}
+def A4_psxtht : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = sxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b01110000111;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sxth";
+}
+def A4_psxthtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = sxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b01110000111;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_sxth";
+}
+def A4_pzxtbf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = zxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b01110000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_zxtb";
+}
+def A4_pzxtbfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = zxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1011;
+let Inst{31-21} = 0b01110000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_zxtb";
+}
+def A4_pzxtbt : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = zxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b01110000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_zxtb";
+}
+def A4_pzxtbtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = zxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b01110000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_zxtb";
+}
+def A4_pzxthf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = zxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b01110000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_zxth";
+}
+def A4_pzxthfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = zxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1011;
+let Inst{31-21} = 0b01110000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_zxth";
+}
+def A4_pzxtht : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = zxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b01110000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_zxth";
+}
+def A4_pzxthtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = zxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b01110000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_zxth";
+}
+def A4_rcmpeq : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = cmp.eq($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A4_rcmpeq";
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A4_rcmpeqi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = cmp.eq($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_16355964, ImmRegRel {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b01110011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A4_rcmpeqi";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A4_rcmpneq : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = !cmp.eq($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A4_rcmpneq";
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A4_rcmpneqi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = !cmp.eq($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_16355964, ImmRegRel {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b01110011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A4_rcmpeqi";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A4_round_ri : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = round($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def A4_round_ri_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = round($Rs32,#$Ii):sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A4_round_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = round($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def A4_round_rr_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = round($Rs32,$Rt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A4_subp_c : HInst<
+(outs DoubleRegs:$Rdd32, PredRegs:$Px4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in),
+"$Rdd32 = sub($Rss32,$Rtt32,$Px4):carry",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_151014 {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000010111;
+let isPredicateLate = 1;
+let Constraints = "$Px4 = $Px4in";
+}
+def A4_tfrcpp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins CtrRegs64:$Css32),
+"$Rdd32 = $Css32",
+CR_tc_3x_SLOT3, TypeCR>, Enc_13094118 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01101000000;
+}
+def A4_tfrpcp : HInst<
+(outs CtrRegs64:$Cdd32),
+(ins DoubleRegs:$Rss32),
+"$Cdd32 = $Rss32",
+CR_tc_3x_SLOT3, TypeCR>, Enc_1329520 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01100011001;
+}
+def A4_tlbmatch : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Pd4 = tlbmatch($Rss32,$Rt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_2492727 {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11010010000;
+let isPredicateLate = 1;
+}
+def A4_vcmpbeq_any : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = any8(vcmpb.eq($Rss32,$Rtt32))",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11010010000;
+}
+def A4_vcmpbeqi : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, u8_0Imm:$Ii),
+"$Pd4 = vcmpb.eq($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 {
+let Inst{4-2} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011100000;
+}
+def A4_vcmpbgt : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmpb.gt($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11010010000;
+}
+def A4_vcmpbgti : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
+"$Pd4 = vcmpb.gt($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 {
+let Inst{4-2} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011100001;
+}
+def A4_vcmpbgtui : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
+"$Pd4 = vcmpb.gtu($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_2968094 {
+let Inst{4-2} = 0b000;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b11011100010;
+}
+def A4_vcmpheqi : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
+"$Pd4 = vcmph.eq($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 {
+let Inst{4-2} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011100000;
+}
+def A4_vcmphgti : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
+"$Pd4 = vcmph.gt($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 {
+let Inst{4-2} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011100001;
+}
+def A4_vcmphgtui : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
+"$Pd4 = vcmph.gtu($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_2968094 {
+let Inst{4-2} = 0b010;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b11011100010;
+}
+def A4_vcmpweqi : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
+"$Pd4 = vcmpw.eq($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 {
+let Inst{4-2} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011100000;
+}
+def A4_vcmpwgti : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
+"$Pd4 = vcmpw.gt($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 {
+let Inst{4-2} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011100001;
+}
+def A4_vcmpwgtui : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
+"$Pd4 = vcmpw.gtu($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_2968094 {
+let Inst{4-2} = 0b100;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b11011100010;
+}
+def A4_vrmaxh : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrmaxh($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrmaxuh : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrmaxuh($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrmaxuw : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrmaxuw($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrmaxw : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrmaxw($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrminh : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrminh($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrminuh : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrminuh($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrminuw : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrminuw($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrminw : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrminw($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A5_ACS : HInst<
+(outs DoubleRegs:$Rxx32, PredRegs:$Pe4),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32,$Pe4 = vacsh($Rss32,$Rtt32)",
+M_tc_3stall_SLOT23, TypeM>, Enc_12822813, Requires<[HasV55T]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010101;
+let isPredicateLate = 1;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A5_vaddhubs : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vaddhub($Rss32,$Rtt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9277990, Requires<[HasV5T]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A6_vminub_RdP : HInst<
+(outs DoubleRegs:$Rdd32, PredRegs:$Pe4),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32,$Pe4 = vminub($Rtt32,$Rss32)",
+M_tc_2_SLOT23, TypeM>, Enc_766909, Requires<[HasV62T]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010111;
+let isPredicateLate = 1;
+let prefersSlot3 = 1;
+}
+def C2_all8 : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4),
+"$Pd4 = all8($Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_6975103 {
+let Inst{13-2} = 0b000000000000;
+let Inst{31-18} = 0b01101011101000;
+}
+def C2_and : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Pt4, PredRegs:$Ps4),
+"$Pd4 = and($Pt4,$Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8891794 {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011000000;
+}
+def C2_andn : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Pt4, PredRegs:$Ps4),
+"$Pd4 = and($Pt4,!$Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8891794 {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011011000;
+}
+def C2_any8 : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4),
+"$Pd4 = any8($Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_6975103 {
+let Inst{13-2} = 0b000000000000;
+let Inst{31-18} = 0b01101011100000;
+}
+def C2_bitsclr : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = bitsclr($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111100;
+}
+def C2_bitsclri : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u6_0Imm:$Ii),
+"$Pd4 = bitsclr($Rs32,#$Ii)",
+S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_14574598 {
+let Inst{7-2} = 0b000000;
+let Inst{31-21} = 0b10000101100;
+}
+def C2_bitsset : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = bitsset($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111010;
+}
+def C2_ccombinewf : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4) $Rdd32 = combine($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8202458, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let BaseOpcode = "A2_combinew";
+}
+def C2_ccombinewnewf : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4.new) $Rdd32 = combine($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8202458, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_combinew";
+}
+def C2_ccombinewnewt : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4.new) $Rdd32 = combine($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8202458, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111101000;
+let isPredicated = 1;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_combinew";
+}
+def C2_ccombinewt : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4) $Rdd32 = combine($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8202458, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111101000;
+let isPredicated = 1;
+let BaseOpcode = "A2_combinew";
+}
+def C2_cmoveif : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii),
+"if (!$Pu4) $Rd32 = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9487067, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b0;
+let Inst{20-20} = 0b0;
+let Inst{31-23} = 0b011111101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_tfr";
+let InputType = "imm";
+let BaseOpcode = "A2_tfrsi";
+let isMoveImm = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 0;
+}
+def C2_cmoveit : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii),
+"if ($Pu4) $Rd32 = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9487067, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b0;
+let Inst{20-20} = 0b0;
+let Inst{31-23} = 0b011111100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_tfr";
+let InputType = "imm";
+let BaseOpcode = "A2_tfrsi";
+let isMoveImm = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 0;
+}
+def C2_cmovenewif : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii),
+"if (!$Pu4.new) $Rd32 = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9487067, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b1;
+let Inst{20-20} = 0b0;
+let Inst{31-23} = 0b011111101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_tfr";
+let InputType = "imm";
+let BaseOpcode = "A2_tfrsi";
+let isMoveImm = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 0;
+}
+def C2_cmovenewit : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii),
+"if ($Pu4.new) $Rd32 = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9487067, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b1;
+let Inst{20-20} = 0b0;
+let Inst{31-23} = 0b011111100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_tfr";
+let InputType = "imm";
+let BaseOpcode = "A2_tfrsi";
+let isMoveImm = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 0;
+}
+def C2_cmpeq : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmp.eq($Rs32,$Rt32)",
+ALU32_3op_tc_2early_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110010000;
+let CextOpcode = "C2_cmpeq";
+let InputType = "reg";
+let isCommutable = 1;
+let isCompare = 1;
+}
+def C2_cmpeqi : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = cmp.eq($Rs32,#$Ii)",
+ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_16014536, ImmRegRel {
+let Inst{4-2} = 0b000;
+let Inst{31-22} = 0b0111010100;
+let CextOpcode = "C2_cmpeq";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def C2_cmpeqp : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = cmp.eq($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010100;
+let isCommutable = 1;
+let isCompare = 1;
+}
+def C2_cmpgei : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s8_0Imm:$Ii),
+"$Pd4 = cmp.ge($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op> {
+let isCompare = 1;
+let isPseudo = 1;
+}
+def C2_cmpgeui : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u8_0Imm:$Ii),
+"$Pd4 = cmp.geu($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op> {
+let isCompare = 1;
+let isPseudo = 1;
+}
+def C2_cmpgt : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmp.gt($Rs32,$Rt32)",
+ALU32_3op_tc_2early_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110010010;
+let CextOpcode = "C2_cmpgt";
+let InputType = "reg";
+let isCompare = 1;
+}
+def C2_cmpgti : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = cmp.gt($Rs32,#$Ii)",
+ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_16014536, ImmRegRel {
+let Inst{4-2} = 0b000;
+let Inst{31-22} = 0b0111010101;
+let CextOpcode = "C2_cmpgt";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def C2_cmpgtp : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = cmp.gt($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010100;
+let isCompare = 1;
+}
+def C2_cmpgtu : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmp.gtu($Rs32,$Rt32)",
+ALU32_3op_tc_2early_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110010011;
+let CextOpcode = "C2_cmpgtu";
+let InputType = "reg";
+let isCompare = 1;
+}
+def C2_cmpgtui : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Pd4 = cmp.gtu($Rs32,#$Ii)",
+ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_13249928, ImmRegRel {
+let Inst{4-2} = 0b000;
+let Inst{31-21} = 0b01110101100;
+let CextOpcode = "C2_cmpgtu";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 0;
+}
+def C2_cmpgtup : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = cmp.gtu($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010100;
+let isCompare = 1;
+}
+def C2_cmplt : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmp.lt($Rs32,$Rt32)",
+PSEUDO, TypeALU32_3op> {
+let isCompare = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def C2_cmpltu : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmp.ltu($Rs32,$Rt32)",
+PSEUDO, TypeALU32_3op> {
+let isCompare = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def C2_mask : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4),
+"$Rdd32 = mask($Pt4)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_10328975 {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b0000;
+let Inst{31-16} = 0b1000011000000000;
+}
+def C2_mux : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mux($Pu4,$Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139 {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def C2_muxii : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii, s8_0Imm:$II),
+"$Rd32 = mux($Pu4,#$Ii,#$II)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9093094 {
+let Inst{31-25} = 0b0111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def C2_muxir : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = mux($Pu4,$Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534 {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def C2_muxri : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii, IntRegs:$Rs32),
+"$Rd32 = mux($Pu4,#$Ii,$Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534 {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def C2_not : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4),
+"$Pd4 = not($Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_6975103 {
+let Inst{13-2} = 0b000000000000;
+let Inst{31-18} = 0b01101011110000;
+}
+def C2_or : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Pt4, PredRegs:$Ps4),
+"$Pd4 = or($Pt4,$Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8891794 {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011001000;
+}
+def C2_orn : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Pt4, PredRegs:$Ps4),
+"$Pd4 = or($Pt4,!$Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8891794 {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011111000;
+}
+def C2_pxfer_map : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4),
+"$Pd4 = $Ps4",
+S_2op_tc_1_SLOT23, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def C2_tfrpr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Ps4),
+"$Rd32 = $Ps4",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_11139981 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-18} = 0b10001001010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def C2_tfrrp : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32),
+"$Pd4 = $Rs32",
+S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_4527648 {
+let Inst{13-2} = 0b000000000000;
+let Inst{31-21} = 0b10000101010;
+}
+def C2_vitpack : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Ps4, PredRegs:$Pt4),
+"$Rd32 = vitpack($Ps4,$Pt4)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_6735062 {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b10001001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def C2_vmux : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmux($Pu4,$Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_7606379 {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010001000;
+}
+def C2_xor : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4),
+"$Pd4 = xor($Ps4,$Pt4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8324216 {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011010000;
+}
+def C4_addipc : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii),
+"$Rd32 = add(pc,#$Ii)",
+CR_tc_2_SLOT3, TypeCR>, Enc_9554661 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0110101001001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def C4_and_and : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = and($Ps4,and($Pt4,$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011000100;
+}
+def C4_and_andn : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = and($Ps4,and($Pt4,!$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011100100;
+}
+def C4_and_or : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = and($Ps4,or($Pt4,$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011001100;
+}
+def C4_and_orn : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = and($Ps4,or($Pt4,!$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011101100;
+}
+def C4_cmplte : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = !cmp.gt($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110010010;
+let CextOpcode = "C4_cmplte";
+let InputType = "reg";
+let isCompare = 1;
+}
+def C4_cmpltei : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = !cmp.gt($Rs32,#$Ii)",
+ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_16014536, ImmRegRel {
+let Inst{4-2} = 0b100;
+let Inst{31-22} = 0b0111010101;
+let CextOpcode = "C4_cmplte";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def C4_cmplteu : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = !cmp.gtu($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110010011;
+let CextOpcode = "C4_cmplteu";
+let InputType = "reg";
+let isCompare = 1;
+}
+def C4_cmplteui : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Pd4 = !cmp.gtu($Rs32,#$Ii)",
+ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_13249928, ImmRegRel {
+let Inst{4-2} = 0b100;
+let Inst{31-21} = 0b01110101100;
+let CextOpcode = "C4_cmplteu";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 0;
+}
+def C4_cmpneq : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = !cmp.eq($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110010000;
+let CextOpcode = "C4_cmpneq";
+let InputType = "reg";
+let isCommutable = 1;
+let isCompare = 1;
+}
+def C4_cmpneqi : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = !cmp.eq($Rs32,#$Ii)",
+ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_16014536, ImmRegRel {
+let Inst{4-2} = 0b100;
+let Inst{31-22} = 0b0111010100;
+let CextOpcode = "C4_cmpneq";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def C4_fastcorner9 : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4),
+"$Pd4 = fastcorner9($Ps4,$Pt4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8324216 {
+let Inst{7-2} = 0b100100;
+let Inst{13-10} = 0b1000;
+let Inst{31-18} = 0b01101011000000;
+}
+def C4_fastcorner9_not : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4),
+"$Pd4 = !fastcorner9($Ps4,$Pt4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8324216 {
+let Inst{7-2} = 0b100100;
+let Inst{13-10} = 0b1000;
+let Inst{31-18} = 0b01101011000100;
+}
+def C4_nbitsclr : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = !bitsclr($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111101;
+}
+def C4_nbitsclri : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u6_0Imm:$Ii),
+"$Pd4 = !bitsclr($Rs32,#$Ii)",
+S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_14574598 {
+let Inst{7-2} = 0b000000;
+let Inst{31-21} = 0b10000101101;
+}
+def C4_nbitsset : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = !bitsset($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111011;
+}
+def C4_or_and : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = or($Ps4,and($Pt4,$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011010100;
+}
+def C4_or_andn : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = or($Ps4,and($Pt4,!$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011110100;
+}
+def C4_or_or : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = or($Ps4,or($Pt4,$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011011100;
+}
+def C4_or_orn : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = or($Ps4,or($Pt4,!$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011111100;
+}
+def F2_conv_d2df : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = convert_d2df($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000011;
+let Inst{31-21} = 0b10000000111;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_d2sf : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_d2sf($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2d : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = convert_df2d($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10000000111;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2d_chop : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = convert_df2d($Rss32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000000111;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2sf : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_df2sf($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2ud : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = convert_df2ud($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10000000111;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2ud_chop : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = convert_df2ud($Rss32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10000000111;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2uw : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_df2uw($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2uw_chop : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_df2uw($Rss32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2w : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_df2w($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2w_chop : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_df2w($Rss32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2d : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_sf2d($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2d_chop : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_sf2d($Rs32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2df : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_sf2df($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2ud : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_sf2ud($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000011;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2ud_chop : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_sf2ud($Rs32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2uw : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = convert_sf2uw($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2uw_chop : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = convert_sf2uw($Rs32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2w : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = convert_sf2w($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2w_chop : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = convert_sf2w($Rs32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_ud2df : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = convert_ud2df($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10000000111;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_ud2sf : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_ud2sf($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_uw2df : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_uw2df($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_uw2sf : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = convert_uw2sf($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001011001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_w2df : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_w2df($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_w2sf : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = convert_w2sf($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_dfclass : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
+"$Pd4 = dfclass($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_14400220, Requires<[HasV5T]> {
+let Inst{4-2} = 0b100;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b11011100100;
+let isFP = 1;
+let Uses = [USR];
+}
+def F2_dfcmpeq : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = dfcmp.eq($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744, Requires<[HasV5T]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_dfcmpge : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = dfcmp.ge($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744, Requires<[HasV5T]> {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_dfcmpgt : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = dfcmp.gt($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744, Requires<[HasV5T]> {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_dfcmpuo : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = dfcmp.uo($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744, Requires<[HasV5T]> {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_dfimm_n : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins u10_0Imm:$Ii),
+"$Rdd32 = dfmake(#$Ii):neg",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_2702036, Requires<[HasV5T]> {
+let Inst{20-16} = 0b00000;
+let Inst{31-22} = 0b1101100101;
+let prefersSlot3 = 1;
+}
+def F2_dfimm_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins u10_0Imm:$Ii),
+"$Rdd32 = dfmake(#$Ii):pos",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_2702036, Requires<[HasV5T]> {
+let Inst{20-16} = 0b00000;
+let Inst{31-22} = 0b1101100100;
+let prefersSlot3 = 1;
+}
+def F2_sfadd : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sfadd($Rs32,$Rt32)",
+M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let isCommutable = 1;
+}
+def F2_sfclass : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Pd4 = sfclass($Rs32,#$Ii)",
+S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_2103742, Requires<[HasV5T]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10000101111;
+let isFP = 1;
+let Uses = [USR];
+}
+def F2_sfcmpeq : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = sfcmp.eq($Rs32,$Rt32)",
+ALU64_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, Requires<[HasV5T]> {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_sfcmpge : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = sfcmp.ge($Rs32,$Rt32)",
+ALU64_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, Requires<[HasV5T]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_sfcmpgt : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = sfcmp.gt($Rs32,$Rt32)",
+ALU64_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, Requires<[HasV5T]> {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_sfcmpuo : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = sfcmp.uo($Rs32,$Rt32)",
+ALU64_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, Requires<[HasV5T]> {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_sffixupd : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sffixupd($Rs32,$Rt32)",
+M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+}
+def F2_sffixupn : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sffixupn($Rs32,$Rt32)",
+M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+}
+def F2_sffixupr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = sffixupr($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+}
+def F2_sffma : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += sfmpy($Rs32,$Rt32)",
+M_tc_3or4x_acc_SLOT23, TypeM>, Enc_9223889, Requires<[HasV5T]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def F2_sffma_lib : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += sfmpy($Rs32,$Rt32):lib",
+M_tc_3or4x_acc_SLOT23, TypeM>, Enc_9223889, Requires<[HasV5T]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def F2_sffma_sc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32, PredRegs:$Pu4),
+"$Rx32 += sfmpy($Rs32,$Rt32,$Pu4):scale",
+M_tc_3or4x_acc_SLOT23, TypeM>, Enc_15194851, Requires<[HasV5T]> {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def F2_sffms : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= sfmpy($Rs32,$Rt32)",
+M_tc_3or4x_acc_SLOT23, TypeM>, Enc_9223889, Requires<[HasV5T]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def F2_sffms_lib : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= sfmpy($Rs32,$Rt32):lib",
+M_tc_3or4x_acc_SLOT23, TypeM>, Enc_9223889, Requires<[HasV5T]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def F2_sfimm_n : HInst<
+(outs IntRegs:$Rd32),
+(ins u10_0Imm:$Ii),
+"$Rd32 = sfmake(#$Ii):neg",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_9082775, Requires<[HasV5T]> {
+let Inst{20-16} = 0b00000;
+let Inst{31-22} = 0b1101011001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def F2_sfimm_p : HInst<
+(outs IntRegs:$Rd32),
+(ins u10_0Imm:$Ii),
+"$Rd32 = sfmake(#$Ii):pos",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_9082775, Requires<[HasV5T]> {
+let Inst{20-16} = 0b00000;
+let Inst{31-22} = 0b1101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def F2_sfinvsqrta : HInst<
+(outs IntRegs:$Rd32, PredRegs:$Pe4),
+(ins IntRegs:$Rs32),
+"$Rd32,$Pe4 = sfinvsqrta($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_5718302, Requires<[HasV5T]> {
+let Inst{13-7} = 0b0000000;
+let Inst{31-21} = 0b10001011111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let isPredicateLate = 1;
+let prefersSlot3 = 1;
+}
+def F2_sfmax : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sfmax($Rs32,$Rt32)",
+M_tc_2_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_sfmin : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sfmin($Rs32,$Rt32)",
+M_tc_2_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_sfmpy : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sfmpy($Rs32,$Rt32)",
+M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let isCommutable = 1;
+}
+def F2_sfrecipa : HInst<
+(outs IntRegs:$Rd32, PredRegs:$Pe4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32,$Pe4 = sfrecipa($Rs32,$Rt32)",
+M_tc_3or4x_SLOT23, TypeM>, Enc_5853469, Requires<[HasV5T]> {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let isPredicateLate = 1;
+let prefersSlot3 = 1;
+}
+def F2_sfsub : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sfsub($Rs32,$Rt32)",
+M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def J2_call : HInst<
+(outs),
+(ins a30_2Imm:$Ii),
+"call $Ii",
+J_tc_2early_SLOT23, TypeJ>, Enc_13453446, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{31-25} = 0b0101101;
+let isCall = 1;
+let prefersSlot3 = 1;
+let Uses = [R29];
+let Defs = [PC, R31];
+let BaseOpcode = "J2_call";
+let isPredicable = 1;
+let hasSideEffects = 1;
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 24;
+let opExtentAlign = 2;
+}
+def J2_callf : HInst<
+(outs),
+(ins PredRegs:$Pu4, a30_2Imm:$Ii),
+"if (!$Pu4) call $Ii",
+J_tc_2early_SLOT23, TypeJ>, Enc_14868535, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b000;
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isCall = 1;
+let prefersSlot3 = 1;
+let Uses = [R29];
+let Defs = [PC, R31];
+let BaseOpcode = "J2_call";
+let hasSideEffects = 1;
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_callr : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"callr $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01010000101;
+let cofMax1 = 1;
+let isCall = 1;
+let prefersSlot3 = 1;
+let Uses = [R29];
+let Defs = [PC, R31];
+let hasSideEffects = 1;
+}
+def J2_callrf : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) callr $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953 {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b01010001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let cofMax1 = 1;
+let isCall = 1;
+let prefersSlot3 = 1;
+let Uses = [R29];
+let Defs = [PC, R31];
+let hasSideEffects = 1;
+let isTaken = Inst{12};
+}
+def J2_callrt : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) callr $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953 {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b01010001000;
+let isPredicated = 1;
+let cofMax1 = 1;
+let isCall = 1;
+let prefersSlot3 = 1;
+let Uses = [R29];
+let Defs = [PC, R31];
+let hasSideEffects = 1;
+let isTaken = Inst{12};
+}
+def J2_callt : HInst<
+(outs),
+(ins PredRegs:$Pu4, a30_2Imm:$Ii),
+"if ($Pu4) call $Ii",
+J_tc_2early_SLOT23, TypeJ>, Enc_14868535, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b000;
+let Inst{21-21} = 0b0;
+let Inst{31-24} = 0b01011101;
+let isPredicated = 1;
+let isCall = 1;
+let prefersSlot3 = 1;
+let Uses = [R29];
+let Defs = [PC, R31];
+let BaseOpcode = "J2_call";
+let hasSideEffects = 1;
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_endloop0 : HInst<
+(outs),
+(ins),
+"endloop0",
+PSEUDO, TypeJ> {
+let Uses = [LC0, SA0];
+let Defs = [LC0, P3, PC, USR];
+let isPseudo = 1;
+}
+def J2_endloop01 : HInst<
+(outs),
+(ins),
+"endloop01",
+PSEUDO, TypeJ> {
+let Uses = [LC0, LC1, SA0, SA1];
+let Defs = [LC0, LC1, P3, PC, USR];
+let isPseudo = 1;
+}
+def J2_endloop1 : HInst<
+(outs),
+(ins),
+"endloop1",
+PSEUDO, TypeJ> {
+let Uses = [LC1, SA1];
+let Defs = [LC1, PC];
+let isPseudo = 1;
+}
+def J2_jump : HInst<
+(outs),
+(ins b30_2Imm:$Ii),
+"jump $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_13453446, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{31-25} = 0b0101100;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isBarrier = 1;
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 24;
+let opExtentAlign = 2;
+}
+def J2_jumpf : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if (!$Pu4) jump:nt $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b000;
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumpf_nopred_map : HInst<
+(outs),
+(ins PredRegs:$Pu4, b15_2Imm:$Ii),
+"if (!$Pu4) jump $Ii",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def J2_jumpfnew : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if (!$Pu4.new) jump:nt $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b010;
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumpfnewpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if (!$Pu4.new) jump:t $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b110;
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumpfpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if (!$Pu4) jump:t $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, Requires<[HasV60T]>, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b100;
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumpr : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"jumpr $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_11704059, PredNewRel {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01010010100;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isBarrier = 1;
+let isPredicable = 1;
+}
+def J2_jumprf : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) jumpr:nt $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b01010011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprf_nopred_map : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) jumpr $Rs32",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def J2_jumprfnew : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) jumpr:nt $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0010;
+let Inst{31-21} = 0b01010011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprfnewpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) jumpr:t $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0110;
+let Inst{31-21} = 0b01010011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprfpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) jumpr:t $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, Requires<[HasV60T]>, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0100;
+let Inst{31-21} = 0b01010011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprgtez : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32>=#0) jump:nt $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b0;
+let Inst{31-22} = 0b0110000101;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprgtezpt : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32>=#0) jump:t $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b1;
+let Inst{31-22} = 0b0110000101;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprltez : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32<=#0) jump:nt $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b0;
+let Inst{31-22} = 0b0110000111;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprltezpt : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32<=#0) jump:t $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b1;
+let Inst{31-22} = 0b0110000111;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprnz : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32==#0) jump:nt $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b0;
+let Inst{31-22} = 0b0110000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprnzpt : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32==#0) jump:t $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b1;
+let Inst{31-22} = 0b0110000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprt : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) jumpr:nt $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b01010011010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprt_nopred_map : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) jumpr $Rs32",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def J2_jumprtnew : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) jumpr:nt $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0010;
+let Inst{31-21} = 0b01010011010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprtnewpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) jumpr:t $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0110;
+let Inst{31-21} = 0b01010011010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprtpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) jumpr:t $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, Requires<[HasV60T]>, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0100;
+let Inst{31-21} = 0b01010011010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprz : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32!=#0) jump:nt $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b0;
+let Inst{31-22} = 0b0110000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprzpt : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32!=#0) jump:t $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b1;
+let Inst{31-22} = 0b0110000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if ($Pu4) jump:nt $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b000;
+let Inst{21-21} = 0b0;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumpt_nopred_map : HInst<
+(outs),
+(ins PredRegs:$Pu4, b15_2Imm:$Ii),
+"if ($Pu4) jump $Ii",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def J2_jumptnew : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if ($Pu4.new) jump:nt $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b010;
+let Inst{21-21} = 0b0;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumptnewpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if ($Pu4.new) jump:t $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b110;
+let Inst{21-21} = 0b0;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumptpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if ($Pu4) jump:t $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, Requires<[HasV60T]>, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b100;
+let Inst{21-21} = 0b0;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_loop0i : HInst<
+(outs),
+(ins b30_2Imm:$Ii, u10_0Imm:$II),
+"loop0($Ii,#$II)",
+CR_tc_3x_SLOT3, TypeCR>, Enc_9939385 {
+let Inst{2-2} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101001000;
+let Defs = [LC0, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_loop0r : HInst<
+(outs),
+(ins b30_2Imm:$Ii, IntRegs:$Rs32),
+"loop0($Ii,$Rs32)",
+CR_tc_3x_SLOT3, TypeCR>, Enc_5790679 {
+let Inst{2-0} = 0b000;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01100000000;
+let Defs = [LC0, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_loop1i : HInst<
+(outs),
+(ins b30_2Imm:$Ii, u10_0Imm:$II),
+"loop1($Ii,#$II)",
+CR_tc_3x_SLOT3, TypeCR>, Enc_9939385 {
+let Inst{2-2} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101001001;
+let Defs = [LC1, SA1];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_loop1r : HInst<
+(outs),
+(ins b30_2Imm:$Ii, IntRegs:$Rs32),
+"loop1($Ii,$Rs32)",
+CR_tc_3x_SLOT3, TypeCR>, Enc_5790679 {
+let Inst{2-0} = 0b000;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01100000001;
+let Defs = [LC1, SA1];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_pause : HInst<
+(outs),
+(ins u8_0Imm:$Ii),
+"pause(#$Ii)",
+J_tc_2early_SLOT2, TypeJ>, Enc_8732960 {
+let Inst{1-0} = 0b00;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0101010001000000;
+let isSolo = 1;
+}
+def J2_ploop1si : HInst<
+(outs),
+(ins b30_2Imm:$Ii, u10_0Imm:$II),
+"p3 = sp1loop0($Ii,#$II)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_9939385 {
+let Inst{2-2} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101001101;
+let isPredicateLate = 1;
+let Defs = [LC0, P3, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_ploop1sr : HInst<
+(outs),
+(ins b30_2Imm:$Ii, IntRegs:$Rs32),
+"p3 = sp1loop0($Ii,$Rs32)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_5790679 {
+let Inst{2-0} = 0b000;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01100000101;
+let isPredicateLate = 1;
+let Defs = [LC0, P3, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_ploop2si : HInst<
+(outs),
+(ins b30_2Imm:$Ii, u10_0Imm:$II),
+"p3 = sp2loop0($Ii,#$II)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_9939385 {
+let Inst{2-2} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101001110;
+let isPredicateLate = 1;
+let Defs = [LC0, P3, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_ploop2sr : HInst<
+(outs),
+(ins b30_2Imm:$Ii, IntRegs:$Rs32),
+"p3 = sp2loop0($Ii,$Rs32)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_5790679 {
+let Inst{2-0} = 0b000;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01100000110;
+let isPredicateLate = 1;
+let Defs = [LC0, P3, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_ploop3si : HInst<
+(outs),
+(ins b30_2Imm:$Ii, u10_0Imm:$II),
+"p3 = sp3loop0($Ii,#$II)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_9939385 {
+let Inst{2-2} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101001111;
+let isPredicateLate = 1;
+let Defs = [LC0, P3, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_ploop3sr : HInst<
+(outs),
+(ins b30_2Imm:$Ii, IntRegs:$Rs32),
+"p3 = sp3loop0($Ii,$Rs32)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_5790679 {
+let Inst{2-0} = 0b000;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01100000111;
+let isPredicateLate = 1;
+let Defs = [LC0, P3, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_trap0 : HInst<
+(outs),
+(ins u8_0Imm:$Ii),
+"trap0(#$Ii)",
+J_tc_2early_SLOT2, TypeJ>, Enc_8732960 {
+let Inst{1-0} = 0b00;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0101010000000000;
+let isSolo = 1;
+}
+def J4_cmpeq_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (!cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeq_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (!cmp.eq($Ns8.new,$Rt32)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeq_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001010001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b10;
+let Inst{31-22} = 0b0001010001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-22} = 0b0001010001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b11;
+let Inst{31-22} = 0b0001010001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeq_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (cmp.eq($Ns8.new,$Rt32)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeq_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001010000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b10;
+let Inst{31-22} = 0b0001010000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-22} = 0b0001010000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b11;
+let Inst{31-22} = 0b0001010000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (!cmp.eq($Ns8.new,#$II)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqi";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqi_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (!cmp.eq($Ns8.new,#$II)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqi";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqi_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001000001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001000001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (cmp.eq($Ns8.new,#$II)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqi";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqi_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (cmp.eq($Ns8.new,#$II)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqi";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqi_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001000000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001000000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001001000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001001000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (!cmp.eq($Ns8.new,#$n1)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4359901, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqn1_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (!cmp.eq($Ns8.new,#$n1)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_8612939, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqn1_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_844699, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{31-22} = 0b0001000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_5338033, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{31-22} = 0b0001000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14150875, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{31-22} = 0b0001001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_15450971, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{31-22} = 0b0001001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (cmp.eq($Ns8.new,#$n1)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_14998517, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqn1_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (cmp.eq($Ns8.new,#$n1)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_11544269, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqn1_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_5401217, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{31-22} = 0b0001000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12419313, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{31-22} = 0b0001000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_4684887, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{31-22} = 0b0001001110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_220949, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{31-22} = 0b0001001110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (!cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgt_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (!cmp.gt($Ns8.new,$Rt32)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgt_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001010011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b10;
+let Inst{31-22} = 0b0001010011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-22} = 0b0001010011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b11;
+let Inst{31-22} = 0b0001010011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgt_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (cmp.gt($Ns8.new,$Rt32)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgt_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001010010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b10;
+let Inst{31-22} = 0b0001010010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-22} = 0b0001010010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b11;
+let Inst{31-22} = 0b0001010010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (!cmp.gt($Ns8.new,#$II)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgti_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (!cmp.gt($Ns8.new,#$II)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgti_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001000011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001000011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001001011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001001011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (cmp.gt($Ns8.new,#$II)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgti_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (cmp.gt($Ns8.new,#$II)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgti_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001000010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001000010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001001010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001001010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (!cmp.gt($Ns8.new,#$n1)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_8674673, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtn1_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (!cmp.gt($Ns8.new,#$n1)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15763937, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtn1_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_5915771, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000001;
+let Inst{31-22} = 0b0001000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7315939, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100001;
+let Inst{31-22} = 0b0001000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7785569, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000001;
+let Inst{31-22} = 0b0001001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_10968391, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100001;
+let Inst{31-22} = 0b0001001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (cmp.gt($Ns8.new,#$n1)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_364753, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtn1_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (cmp.gt($Ns8.new,#$n1)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_8479583, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtn1_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_2428539, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000001;
+let Inst{31-22} = 0b0001000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_8919369, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100001;
+let Inst{31-22} = 0b0001000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_8577055, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000001;
+let Inst{31-22} = 0b0001001110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14530015, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100001;
+let Inst{31-22} = 0b0001001110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (!cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtu_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (!cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtu_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001010101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtup0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b10;
+let Inst{31-22} = 0b0001010101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtup0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-22} = 0b0001010101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtup1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b11;
+let Inst{31-22} = 0b0001010101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtup1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtu_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtu_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001010100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtup0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b10;
+let Inst{31-22} = 0b0001010100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtup0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-22} = 0b0001010100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtup1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b11;
+let Inst{31-22} = 0b0001010100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtup1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (!cmp.gtu($Ns8.new,#$II)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtuir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtui_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (!cmp.gtu($Ns8.new,#$II)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtuir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtui_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtuip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtuip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtuip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtuip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (cmp.gtu($Ns8.new,#$II)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtuir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtui_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (cmp.gtu($Ns8.new,#$II)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtuir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtui_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtuip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtuip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001001100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtuip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001001100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtuip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmplt_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (!cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmplt_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (!cmp.gt($Rt32,$Ns8.new)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmplt_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmplt_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (cmp.gt($Rt32,$Ns8.new)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmpltu_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (!cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmpltu_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (!cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmpltu_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010001000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmpltu_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010001000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_hintjumpr : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"hintjr($Rs32)",
+J_tc_2early_SLOT2, TypeJ>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01010010101;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+}
+def J4_jumpseti : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins u6_0Imm:$II, b30_2Imm:$Ii),
+"$Rd16 = #$II ; jump $Ii",
+COMPOUND, TypeCJ>, Enc_4834775 {
+let Inst{0-0} = 0b0;
+let Inst{31-22} = 0b0001011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_jumpsetr : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"$Rd16 = $Rs16 ; jump $Ii",
+COMPOUND, TypeCJ>, Enc_2639299 {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (!tstbit($Ns8.new,#0)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_1898420 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_tstbit0_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (!tstbit($Ns8.new,#0)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_1898420 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_tstbit0_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p0 = tstbit($Rs16,#0); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000011;
+let Inst{31-22} = 0b0001000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p0 = tstbit($Rs16,#0); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100011;
+let Inst{31-22} = 0b0001000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p1 = tstbit($Rs16,#0); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000011;
+let Inst{31-22} = 0b0001001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p1 = tstbit($Rs16,#0); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100011;
+let Inst{31-22} = 0b0001001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (tstbit($Ns8.new,#0)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_1898420 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_tstbit0_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (tstbit($Ns8.new,#0)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_1898420 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_tstbit0_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p0 = tstbit($Rs16,#0); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000011;
+let Inst{31-22} = 0b0001000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p0 = tstbit($Rs16,#0); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100011;
+let Inst{31-22} = 0b0001000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p1 = tstbit($Rs16,#0); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000011;
+let Inst{31-22} = 0b0001001110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p1 = tstbit($Rs16,#0); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100011;
+let Inst{31-22} = 0b0001001110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def L2_deallocframe : HInst<
+(outs),
+(ins),
+"deallocframe",
+LD_tc_ld_SLOT01, TypeLD>, Enc_0 {
+let Inst{4-0} = 0b11110;
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10010000000;
+let Inst{20-16} = 0b11110;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [R29, R30, R31];
+}
+def L2_loadalignb_io : HInst<
+(outs DoubleRegs:$Ryy32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Ryy32 = memb_fifo($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_449439 {
+let Inst{24-21} = 0b0100;
+let Inst{31-27} = 0b10010;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L2_loadalignb_pbr : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Ryy32 = memb_fifo($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011110100;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignb_pci : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
+"$Ryy32 = memb_fifo($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_971347 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011000100;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignb_pcr : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Ryy32 = memb_fifo($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011000100;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignb_pi : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"$Ryy32 = memb_fifo($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6372758 {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011010100;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignb_pr : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Ryy32 = memb_fifo($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011100100;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignb_zomap : HInst<
+(outs DoubleRegs:$Ryy32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32),
+"$Ryy32 = memb_fifo($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L2_loadalignh_io : HInst<
+(outs DoubleRegs:$Ryy32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s31_1Imm:$Ii),
+"$Ryy32 = memh_fifo($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_11930027 {
+let Inst{24-21} = 0b0010;
+let Inst{31-27} = 0b10010;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L2_loadalignh_pbr : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Ryy32 = memh_fifo($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011110010;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignh_pci : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
+"$Ryy32 = memh_fifo($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_1971351 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011000010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignh_pcr : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Ryy32 = memh_fifo($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011000010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignh_pi : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"$Ryy32 = memh_fifo($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_3372766 {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011010010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignh_pr : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Ryy32 = memh_fifo($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011100010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignh_zomap : HInst<
+(outs DoubleRegs:$Ryy32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32),
+"$Ryy32 = memh_fifo($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L2_loadbsw2_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii),
+"$Rd32 = membh($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15275738 {
+let Inst{24-21} = 0b0001;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def L2_loadbsw2_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = membh($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw2_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = membh($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13303422 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw2_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = membh($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw2_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
+"$Rd32 = membh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_15376009 {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011010001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw2_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = membh($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw2_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = membh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadbsw4_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, s30_2Imm:$Ii),
+"$Rdd32 = membh($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_9852473 {
+let Inst{24-21} = 0b0111;
+let Inst{31-27} = 0b10010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 13;
+let opExtentAlign = 2;
+}
+def L2_loadbsw4_pbr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = membh($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011110111;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw4_pci : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
+"$Rdd32 = membh($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_3931661 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011000111;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw4_pcr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = membh($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011000111;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw4_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
+"$Rdd32 = membh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_8752140 {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011010111;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw4_pr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = membh($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011100111;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw4_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = membh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadbzw2_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii),
+"$Rd32 = memubh($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15275738 {
+let Inst{24-21} = 0b0011;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def L2_loadbzw2_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memubh($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw2_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = memubh($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13303422 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw2_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memubh($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw2_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
+"$Rd32 = memubh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_15376009 {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011010011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw2_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memubh($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw2_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memubh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadbzw4_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, s30_2Imm:$Ii),
+"$Rdd32 = memubh($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_9852473 {
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b10010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 13;
+let opExtentAlign = 2;
+}
+def L2_loadbzw4_pbr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = memubh($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011110101;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw4_pci : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
+"$Rdd32 = memubh($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_3931661 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011000101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw4_pcr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = memubh($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011000101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw4_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
+"$Rdd32 = memubh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_8752140 {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011010101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw4_pr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = memubh($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011100101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw4_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = memubh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadrb_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = memb($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_14461004, AddrModeRel {
+let Inst{24-21} = 0b1000;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L2_loadrb_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+}
+def L2_loadrb_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memb($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrb_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = memb($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_16303398 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrb_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memb($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrb_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii),
+"$Rd32 = memb($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5598813, PredNewRel {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrb_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrb_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memb($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrb_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memb($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadrbgp : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii),
+"$Rd32 = memb(gp+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1886960, AddrModeRel {
+let Inst{24-21} = 0b1000;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [GP];
+let BaseOpcode = "L4_loadrb_abs";
+let isPredicable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def L2_loadrd_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, s29_3Imm:$Ii),
+"$Rdd32 = memd($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_163381, AddrModeRel {
+let Inst{24-21} = 0b1110;
+let Inst{31-27} = 0b10010;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L2_loadrd_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 14;
+let opExtentAlign = 3;
+}
+def L2_loadrd_pbr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = memd($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011111110;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrd_pci : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2),
+"$Rdd32 = memd($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_931653 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011001110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrd_pcr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = memd($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011001110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrd_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_3Imm:$Ii),
+"$Rdd32 = memd($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_9752128, PredNewRel {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011011110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrd_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrd_pr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = memd($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011101110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrd_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = memd($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadrdgp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins u29_3Imm:$Ii),
+"$Rdd32 = memd(gp+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4975051, AddrModeRel {
+let Inst{24-21} = 0b1110;
+let Inst{31-27} = 0b01001;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let Uses = [GP];
+let BaseOpcode = "L4_loadrd_abs";
+let isPredicable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 19;
+let opExtentAlign = 3;
+}
+def L2_loadrh_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii),
+"$Rd32 = memh($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15275738, AddrModeRel {
+let Inst{24-21} = 0b1010;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L2_loadrh_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def L2_loadrh_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memh($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrh_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = memh($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13303422 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrh_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memh($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrh_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
+"$Rd32 = memh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_15376009, PredNewRel {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrh_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrh_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memh($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrh_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadrhgp : HInst<
+(outs IntRegs:$Rd32),
+(ins u31_1Imm:$Ii),
+"$Rd32 = memh(gp+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_12608570, AddrModeRel {
+let Inst{24-21} = 0b1010;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [GP];
+let BaseOpcode = "L4_loadrh_abs";
+let isPredicable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def L2_loadri_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s30_2Imm:$Ii),
+"$Rd32 = memw($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_8990840, AddrModeRel {
+let Inst{24-21} = 0b1100;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L2_loadri_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 13;
+let opExtentAlign = 2;
+}
+def L2_loadri_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memw($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadri_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = memw($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_14303394 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadri_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memw($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadri_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
+"$Rd32 = memw($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_16376009, PredNewRel {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadri_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadri_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memw($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadri_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memw($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadrigp : HInst<
+(outs IntRegs:$Rd32),
+(ins u30_2Imm:$Ii),
+"$Rd32 = memw(gp+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_8814718, AddrModeRel {
+let Inst{24-21} = 0b1100;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [GP];
+let BaseOpcode = "L4_loadri_abs";
+let isPredicable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 18;
+let opExtentAlign = 2;
+}
+def L2_loadrub_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = memub($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_14461004, AddrModeRel {
+let Inst{24-21} = 0b1001;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L2_loadrub_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+}
+def L2_loadrub_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memub($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrub_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = memub($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_16303398 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrub_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memub($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrub_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii),
+"$Rd32 = memub($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5598813, PredNewRel {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011011001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrub_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrub_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memub($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrub_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memub($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadrubgp : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii),
+"$Rd32 = memub(gp+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1886960, AddrModeRel {
+let Inst{24-21} = 0b1001;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [GP];
+let BaseOpcode = "L4_loadrub_abs";
+let isPredicable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def L2_loadruh_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii),
+"$Rd32 = memuh($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15275738, AddrModeRel {
+let Inst{24-21} = 0b1011;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L2_loadruh_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def L2_loadruh_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memuh($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadruh_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = memuh($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13303422 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadruh_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memuh($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadruh_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
+"$Rd32 = memuh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_15376009, PredNewRel {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadruh_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadruh_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memuh($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadruh_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memuh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadruhgp : HInst<
+(outs IntRegs:$Rd32),
+(ins u31_1Imm:$Ii),
+"$Rd32 = memuh(gp+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_12608570, AddrModeRel {
+let Inst{24-21} = 0b1011;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [GP];
+let BaseOpcode = "L4_loadruh_abs";
+let isPredicable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def L2_loadw_locked : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memw_locked($Rs32)",
+LD_tc_ld_SLOT0, TypeLD>, Enc_4075554 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10010010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = WordAccess;
+let isSoloAX = 1;
+let mayLoad = 1;
+}
+def L2_ploadrbf_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memb($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L2_loadrb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrbf_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memb($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrb_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrbf_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4) $Rd32 = memb($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrbfnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memb($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000111000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L2_loadrb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrbfnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memb($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrb_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrbfnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4.new) $Rd32 = memb($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrbt_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memb($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000001000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L2_loadrb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrbt_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memb($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011011000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrb_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrbt_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4) $Rd32 = memb($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrbtnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memb($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000011000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L2_loadrb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrbtnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memb($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011011000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrb_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrbtnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4.new) $Rd32 = memb($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrdf_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
+"if (!$Pt4) $Rdd32 = memd($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_677558, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000101110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L2_loadrd_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def L2_ploadrdf_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
+"if (!$Pt4) $Rdd32 = memd($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5611087, PredNewRel {
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrdf_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4) $Rdd32 = memd($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrdfnew_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
+"if (!$Pt4.new) $Rdd32 = memd($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_677558, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000111110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L2_loadrd_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def L2_ploadrdfnew_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
+"if (!$Pt4.new) $Rdd32 = memd($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5611087, PredNewRel {
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrdfnew_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4.new) $Rdd32 = memd($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrdt_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
+"if ($Pt4) $Rdd32 = memd($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_677558, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000001110;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L2_loadrd_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def L2_ploadrdt_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
+"if ($Pt4) $Rdd32 = memd($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5611087, PredNewRel {
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011011110;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrdt_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4) $Rdd32 = memd($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrdtnew_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
+"if ($Pt4.new) $Rdd32 = memd($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_677558, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000011110;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L2_loadrd_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def L2_ploadrdtnew_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
+"if ($Pt4.new) $Rdd32 = memd($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5611087, PredNewRel {
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011011110;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrdtnew_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4.new) $Rdd32 = memd($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrhf_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if (!$Pt4) $Rd32 = memh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000101010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L2_loadrh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadrhf_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if (!$Pt4) $Rd32 = memh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011011010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrhf_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4) $Rd32 = memh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrhfnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000111010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L2_loadrh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadrhfnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011011010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrhfnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4.new) $Rd32 = memh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrht_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if ($Pt4) $Rd32 = memh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000001010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L2_loadrh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadrht_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if ($Pt4) $Rd32 = memh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011011010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrht_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4) $Rd32 = memh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrhtnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000011010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L2_loadrh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadrhtnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011011010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrhtnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4.new) $Rd32 = memh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrif_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
+"if (!$Pt4) $Rd32 = memw($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_2835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000101100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L2_loadri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L2_ploadrif_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
+"if (!$Pt4) $Rd32 = memw($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6212930, PredNewRel {
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadri_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrif_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4) $Rd32 = memw($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrifnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memw($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_2835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000111100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L2_loadri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L2_ploadrifnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memw($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6212930, PredNewRel {
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadri_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrifnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4.new) $Rd32 = memw($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrit_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
+"if ($Pt4) $Rd32 = memw($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_2835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L2_loadri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L2_ploadrit_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
+"if ($Pt4) $Rd32 = memw($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6212930, PredNewRel {
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadri_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrit_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4) $Rd32 = memw($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadritnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memw($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_2835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L2_loadri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L2_ploadritnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memw($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6212930, PredNewRel {
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadri_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadritnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4.new) $Rd32 = memw($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrubf_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memub($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000101001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L2_loadrub_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrubf_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memub($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrub_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrubf_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4) $Rd32 = memub($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrubfnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memub($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000111001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L2_loadrub_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrubfnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memub($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrub_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrubfnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4.new) $Rd32 = memub($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrubt_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memub($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000001001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L2_loadrub_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrubt_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memub($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011011001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrub_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrubt_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4) $Rd32 = memub($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrubtnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memub($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000011001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L2_loadrub_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrubtnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memub($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011011001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrub_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrubtnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4.new) $Rd32 = memub($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadruhf_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if (!$Pt4) $Rd32 = memuh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000101011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L2_loadruh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadruhf_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if (!$Pt4) $Rd32 = memuh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadruh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadruhf_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4) $Rd32 = memuh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadruhfnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memuh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000111011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L2_loadruh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadruhfnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memuh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadruh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadruhfnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4.new) $Rd32 = memuh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadruht_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if ($Pt4) $Rd32 = memuh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000001011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L2_loadruh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadruht_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if ($Pt4) $Rd32 = memuh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011011011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadruh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadruht_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4) $Rd32 = memuh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadruhtnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memuh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000011011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L2_loadruh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadruhtnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memuh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011011011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadruh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadruhtnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4.new) $Rd32 = memuh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_add_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+#$Ii) += $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_11849200 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_add_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memb($Rs32) += $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_add_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) += $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8849208 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_add_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memh($Rs32) += $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_add_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+#$Ii) += $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9849208 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_add_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memw($Rs32) += $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_and_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+#$Ii) &= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_11849200 {
+let Inst{6-5} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_and_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memb($Rs32) &= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_and_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) &= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8849208 {
+let Inst{6-5} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_and_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memh($Rs32) &= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_and_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+#$Ii) &= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9849208 {
+let Inst{6-5} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_and_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memw($Rs32) &= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_iadd_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
+"memb($Rs32+#$Ii) += #$II",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_6773159 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_iadd_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memb($Rs32) += #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_iadd_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
+"memh($Rs32+#$Ii) += #$II",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9773167 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_iadd_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memh($Rs32) += #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_iadd_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
+"memw($Rs32+#$Ii) += #$II",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8773155 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_iadd_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memw($Rs32) += #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_iand_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
+"memb($Rs32+#$Ii) = clrbit(#$II)",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_6773159 {
+let Inst{6-5} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_iand_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memb($Rs32) = clrbit(#$II)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_iand_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
+"memh($Rs32+#$Ii) = clrbit(#$II)",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9773167 {
+let Inst{6-5} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_iand_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memh($Rs32) = clrbit(#$II)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_iand_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
+"memw($Rs32+#$Ii) = clrbit(#$II)",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8773155 {
+let Inst{6-5} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_iand_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memw($Rs32) = clrbit(#$II)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_ior_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
+"memb($Rs32+#$Ii) = setbit(#$II)",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_6773159 {
+let Inst{6-5} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ior_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memb($Rs32) = setbit(#$II)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_ior_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
+"memh($Rs32+#$Ii) = setbit(#$II)",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9773167 {
+let Inst{6-5} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_ior_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memh($Rs32) = setbit(#$II)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_ior_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
+"memw($Rs32+#$Ii) = setbit(#$II)",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8773155 {
+let Inst{6-5} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_ior_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memw($Rs32) = setbit(#$II)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_isub_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
+"memb($Rs32+#$Ii) -= #$II",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_6773159 {
+let Inst{6-5} = 0b01;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_isub_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memb($Rs32) -= #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_isub_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
+"memh($Rs32+#$Ii) -= #$II",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9773167 {
+let Inst{6-5} = 0b01;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_isub_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memh($Rs32) -= #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_isub_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
+"memw($Rs32+#$Ii) -= #$II",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8773155 {
+let Inst{6-5} = 0b01;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_isub_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memw($Rs32) -= #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_loadalignb_ap : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Re32),
+(ins DoubleRegs:$Ryy32in, u32_0Imm:$II),
+"$Ryy32 = memb_fifo($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_11047413 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011010100;
+let hasNewValue = 1;
+let opNewValue = 1;
+let addrMode = AbsoluteSet;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L4_loadalignb_ur : HInst<
+(outs DoubleRegs:$Ryy32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Ryy32 = memb_fifo($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_7303598 {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011100100;
+let addrMode = BaseLongOffset;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 4;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L4_loadalignh_ap : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Re32),
+(ins DoubleRegs:$Ryy32in, u32_0Imm:$II),
+"$Ryy32 = memh_fifo($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_11047413 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011010010;
+let hasNewValue = 1;
+let opNewValue = 1;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L4_loadalignh_ur : HInst<
+(outs DoubleRegs:$Ryy32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Ryy32 = memh_fifo($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_7303598 {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011100010;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 4;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L4_loadbsw2_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = membh($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011010001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbsw2_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = membh($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163 {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbsw4_ap : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rdd32 = membh($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_877823 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011010111;
+let hasNewValue = 1;
+let opNewValue = 1;
+let addrMode = AbsoluteSet;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbsw4_ur : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rdd32 = membh($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_5582416 {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011100111;
+let addrMode = BaseLongOffset;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbzw2_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = memubh($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011010011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbzw2_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = memubh($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163 {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbzw4_ap : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rdd32 = memubh($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_877823 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011010101;
+let hasNewValue = 1;
+let opNewValue = 1;
+let addrMode = AbsoluteSet;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbzw4_ur : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rdd32 = memubh($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_5582416 {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011100101;
+let addrMode = BaseLongOffset;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadd_locked : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = memd_locked($Rs32)",
+LD_tc_ld_SLOT0, TypeLD>, Enc_4030179 {
+let Inst{13-5} = 0b010000000;
+let Inst{31-21} = 0b10010010000;
+let accessSize = DoubleWordAccess;
+let isSoloAX = 1;
+let mayLoad = 1;
+}
+def L4_loadrb_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = memb($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrb_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rd32 = memb($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrb_rr";
+let isPredicable = 1;
+}
+def L4_loadrb_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = memb($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrd_ap : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rdd32 = memd($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_877823 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011011110;
+let hasNewValue = 1;
+let opNewValue = 1;
+let addrMode = AbsoluteSet;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrd_rr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7581852, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111010110;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrd_rr";
+let isPredicable = 1;
+}
+def L4_loadrd_ur : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rdd32 = memd($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_5582416, AddrModeRel, ImmRegShl {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011101110;
+let addrMode = BaseLongOffset;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrh_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = memh($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrh_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rd32 = memh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111010010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrh_rr";
+let isPredicable = 1;
+}
+def L4_loadrh_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = memh($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadri_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = memw($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadri_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rd32 = memw($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111010100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let InputType = "reg";
+let BaseOpcode = "L4_loadri_rr";
+let isPredicable = 1;
+}
+def L4_loadri_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = memw($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrub_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = memub($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011011001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrub_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rd32 = memub($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111010001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrub_rr";
+let isPredicable = 1;
+}
+def L4_loadrub_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = memub($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadruh_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = memuh($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadruh_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111010011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadruh_rr";
+let isPredicable = 1;
+}
+def L4_loadruh_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = memuh($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_or_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+#$Ii) |= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_11849200 {
+let Inst{6-5} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_or_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memb($Rs32) |= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_or_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) |= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8849208 {
+let Inst{6-5} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_or_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memh($Rs32) |= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_or_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+#$Ii) |= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9849208 {
+let Inst{6-5} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_or_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memw($Rs32) |= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_ploadrbf_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memb(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011111000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L4_loadrb_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrbf_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110001000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrb_rr";
+}
+def L4_ploadrbfnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memb(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011111000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L4_loadrb_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrbfnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrb_rr";
+}
+def L4_ploadrbt_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memb(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011111000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L4_loadrb_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrbt_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110000000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrb_rr";
+}
+def L4_ploadrbtnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memb(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011111000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L4_loadrb_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrbtnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110010000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrb_rr";
+}
+def L4_ploadrdf_abs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4) $Rdd32 = memd(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15182416, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011111110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L4_loadrd_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrdf_rr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7254313, AddrModeRel {
+let Inst{31-21} = 0b00110001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrd_rr";
+}
+def L4_ploadrdfnew_abs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rdd32 = memd(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15182416, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011111110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L4_loadrd_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrdfnew_rr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7254313, AddrModeRel {
+let Inst{31-21} = 0b00110011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrd_rr";
+}
+def L4_ploadrdt_abs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4) $Rdd32 = memd(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15182416, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011111110;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L4_loadrd_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrdt_rr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7254313, AddrModeRel {
+let Inst{31-21} = 0b00110000110;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrd_rr";
+}
+def L4_ploadrdtnew_abs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rdd32 = memd(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15182416, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011111110;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L4_loadrd_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrdtnew_rr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7254313, AddrModeRel {
+let Inst{31-21} = 0b00110010110;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrd_rr";
+}
+def L4_ploadrhf_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011111010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L4_loadrh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrhf_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110001010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrh_rr";
+}
+def L4_ploadrhfnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011111010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L4_loadrh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrhfnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110011010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrh_rr";
+}
+def L4_ploadrht_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011111010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L4_loadrh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrht_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110000010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrh_rr";
+}
+def L4_ploadrhtnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011111010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L4_loadrh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrhtnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110010010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrh_rr";
+}
+def L4_ploadrif_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memw(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011111100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L4_loadri_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrif_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let InputType = "reg";
+let BaseOpcode = "L4_loadri_rr";
+}
+def L4_ploadrifnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memw(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011111100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L4_loadri_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrifnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let InputType = "reg";
+let BaseOpcode = "L4_loadri_rr";
+}
+def L4_ploadrit_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memw(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011111100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L4_loadri_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrit_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let InputType = "reg";
+let BaseOpcode = "L4_loadri_rr";
+}
+def L4_ploadritnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memw(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011111100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L4_loadri_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadritnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110010100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let InputType = "reg";
+let BaseOpcode = "L4_loadri_rr";
+}
+def L4_ploadrubf_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memub(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011111001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L4_loadrub_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrubf_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrub_rr";
+}
+def L4_ploadrubfnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memub(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011111001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L4_loadrub_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrubfnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrub_rr";
+}
+def L4_ploadrubt_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memub(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011111001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L4_loadrub_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrubt_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110000001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrub_rr";
+}
+def L4_ploadrubtnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memub(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011111001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L4_loadrub_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrubtnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110010001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrub_rr";
+}
+def L4_ploadruhf_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memuh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011111011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L4_loadruh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadruhf_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110001011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadruh_rr";
+}
+def L4_ploadruhfnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memuh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011111011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L4_loadruh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadruhfnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadruh_rr";
+}
+def L4_ploadruht_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memuh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011111011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L4_loadruh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadruht_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110000011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadruh_rr";
+}
+def L4_ploadruhtnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memuh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011111011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L4_loadruh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadruhtnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110010011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadruh_rr";
+}
+def L4_return : HInst<
+(outs),
+(ins),
+"dealloc_return",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_0, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isBarrier = 1;
+let isPredicable = 1;
+let isTaken = 1;
+}
+def L4_return_f : HInst<
+(outs),
+(ins PredRegs:$Pv4),
+"if (!$Pv4) dealloc_return",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1100;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isTaken = Inst{12};
+}
+def L4_return_fnew_pnt : HInst<
+(outs),
+(ins PredRegs:$Pv4),
+"if (!$Pv4.new) dealloc_return:nt",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isTaken = Inst{12};
+}
+def L4_return_fnew_pt : HInst<
+(outs),
+(ins PredRegs:$Pv4),
+"if (!$Pv4.new) dealloc_return:t",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1110;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isTaken = Inst{12};
+}
+def L4_return_t : HInst<
+(outs),
+(ins PredRegs:$Pv4),
+"if ($Pv4) dealloc_return",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b0100;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isTaken = Inst{12};
+}
+def L4_return_tnew_pnt : HInst<
+(outs),
+(ins PredRegs:$Pv4),
+"if ($Pv4.new) dealloc_return:nt",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b0010;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isTaken = Inst{12};
+}
+def L4_return_tnew_pt : HInst<
+(outs),
+(ins PredRegs:$Pv4),
+"if ($Pv4.new) dealloc_return:t",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b0110;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isTaken = Inst{12};
+}
+def L4_sub_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+#$Ii) -= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_11849200 {
+let Inst{6-5} = 0b01;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_sub_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memb($Rs32) -= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_sub_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) -= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8849208 {
+let Inst{6-5} = 0b01;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_sub_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memh($Rs32) -= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_sub_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+#$Ii) -= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9849208 {
+let Inst{6-5} = 0b01;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_sub_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memw($Rs32) -= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def M2_acci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += add($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889, ImmRegRel {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M2_acci";
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_accii : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rx32 += add($Rs32,#$Ii)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_11522288, ImmRegRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M2_acci";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_cmaci_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += cmpyi($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cmacr_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += cmpyr($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cmacs_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += cmpy($Rs32,$Rt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cmacs_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += cmpy($Rs32,$Rt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cmacsc_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += cmpy($Rs32,$Rt32*):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cmacsc_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += cmpy($Rs32,$Rt32*):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cmpyi_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = cmpyi($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101000;
+let prefersSlot3 = 1;
+}
+def M2_cmpyr_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = cmpyr($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101000;
+let prefersSlot3 = 1;
+}
+def M2_cmpyrs_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = cmpy($Rs32,$Rt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpyrs_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = cmpy($Rs32,$Rt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpyrsc_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = cmpy($Rs32,$Rt32*):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpyrsc_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = cmpy($Rs32,$Rt32*):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpys_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = cmpy($Rs32,$Rt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpys_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = cmpy($Rs32,$Rt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpysc_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = cmpy($Rs32,$Rt32*):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpysc_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = cmpy($Rs32,$Rt32*):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cnacs_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= cmpy($Rs32,$Rt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cnacs_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= cmpy($Rs32,$Rt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cnacsc_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= cmpy($Rs32,$Rt32*):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cnacsc_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= cmpy($Rs32,$Rt32*):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_dpmpyss_acc_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_dpmpyss_nac_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_dpmpyss_rnd_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_dpmpyss_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101000;
+let prefersSlot3 = 1;
+}
+def M2_dpmpyuu_acc_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_dpmpyuu_nac_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_dpmpyuu_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101010;
+let prefersSlot3 = 1;
+}
+def M2_hmmpyh_rs1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32.h):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_hmmpyh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32.h):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_hmmpyl_rs1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32.l):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_hmmpyl_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32.l):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_maci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyi($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M2_maci";
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_macsin : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Rx32 -= mpyi($Rs32,#$Ii)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_11522288 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_macsip : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Rx32 += mpyi($Rs32,#$Ii)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_11522288, ImmRegRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M2_maci";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mmachs_rs0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywoh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmachs_rs1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmachs_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywoh($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmachs_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacls_rs0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacls_rs1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacls_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweh($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacls_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacuhs_rs0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywouh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010011;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacuhs_rs1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacuhs_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywouh($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacuhs_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmaculs_rs0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweuh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010011;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmaculs_rs1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmaculs_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweuh($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmaculs_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmpyh_rs0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywoh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyh_rs1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywoh($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyl_rs0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyl_rs1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyl_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweh($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyl_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyuh_rs0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywouh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000011;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyuh_rs1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyuh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywouh($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyuh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyul_rs0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweuh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000011;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyul_rs1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyul_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweuh($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyul_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_acc_hh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_hh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_hl_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_hl_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_lh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_lh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_ll_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_ll_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_hh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.h):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_hh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.h):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_hl_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.l):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_hl_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.l):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_lh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.h):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_lh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.h):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_ll_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.l):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_ll_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.l):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_hh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_hh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_hl_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_hl_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_lh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_lh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_ll_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_ll_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_nac_hh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_hh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_hl_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_hl_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_lh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_lh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_ll_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_ll_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_hh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.h):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_hh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_hl_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.l):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_hl_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_lh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.h):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_lh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_ll_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.l):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_ll_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_rnd_hh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_hh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_hl_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_hl_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_lh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_lh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_ll_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_ll_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_sat_hh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_hh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_hl_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_hl_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_lh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_lh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_ll_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_ll_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_hh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_hh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_hl_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_hl_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_lh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_lh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_ll_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_ll_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_up : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_up_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_up_s1_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpyd_acc_hh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_hh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_hl_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_hl_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_lh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_lh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_ll_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_ll_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_hh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100000;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_hh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100100;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_hl_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100000;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_hl_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100100;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_lh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100000;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_lh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100100;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_ll_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100000;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_ll_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100100;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_nac_hh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_hh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_hl_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_hl_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_lh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_lh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_ll_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_ll_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_rnd_hh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.h):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100001;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_hh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100101;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_hl_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.l):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100001;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_hl_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100101;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_lh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.h):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100001;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_lh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100101;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_ll_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.l):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100001;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_ll_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100101;
+let prefersSlot3 = 1;
+}
+def M2_mpyi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyi($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M2_mpyi";
+let InputType = "reg";
+}
+def M2_mpysin : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u8_0Imm:$Ii),
+"$Rd32 = -mpyi($Rs32,#$Ii)",
+M_tc_3x_SLOT23, TypeM>, Enc_16355964 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpysip : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Rd32 = +mpyi($Rs32,#$Ii)",
+M_tc_3x_SLOT23, TypeM>, Enc_16355964 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def M2_mpysmi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, m32_0Imm:$Ii),
+"$Rd32 = mpyi($Rs32,#$Ii)",
+M_tc_3x_SLOT23, TypeM>, ImmRegRel {
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "M2_mpyi";
+let InputType = "imm";
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 0;
+}
+def M2_mpysu_up : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpysu($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_acc_hh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_hh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_hl_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_hl_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_lh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_lh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_ll_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_ll_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_hh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.h,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_hh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_hl_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.h,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_hl_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_lh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.l,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_lh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_ll_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.l,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_ll_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_nac_hh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_hh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_hl_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_hl_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_lh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_lh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_ll_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_ll_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_up : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_acc_hh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_hh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_hl_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_hl_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_lh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_lh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_ll_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_ll_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_hh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.h,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100010;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_hh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100110;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_hl_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.h,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100010;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_hl_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100110;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_lh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.l,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100010;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_lh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100110;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_ll_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.l,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100010;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_ll_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100110;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_nac_hh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_hh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110111;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_hl_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_hl_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110111;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_lh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_lh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110111;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_ll_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_ll_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110111;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyui : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyui($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def M2_nacci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= add($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_naccii : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rx32 -= add($Rs32,#$Ii)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_11522288 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100010100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_subacc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rx32 += sub($Rt32,$Rs32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_7692963 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_vabsdiffh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vabsdiffh($Rtt32,$Rss32)",
+M_tc_2_SLOT23, TypeM>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000011;
+let prefersSlot3 = 1;
+}
+def M2_vabsdiffw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vabsdiffw($Rtt32,$Rss32)",
+M_tc_2_SLOT23, TypeM>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000001;
+let prefersSlot3 = 1;
+}
+def M2_vcmac_s0_sat_i : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vcmpyi($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vcmac_s0_sat_r : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vcmpyr($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vcmpy_s0_sat_i : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vcmpyi($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vcmpy_s0_sat_r : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vcmpyr($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vcmpy_s1_sat_i : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vcmpyi($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vcmpy_s1_sat_r : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vcmpyr($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vdmacs_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vdmpy($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vdmacs_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vdmpy($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vdmpyrs_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vdmpy($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_9277990 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vdmpyrs_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vdmpy($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_9277990 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vdmpys_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vdmpy($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vdmpys_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vdmpy($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmac2 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpyh($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2es : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyeh($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2es_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyeh($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2es_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyeh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2s_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpyh($Rs32,$Rt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2s_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpyh($Rs32,$Rt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2su_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpyhsu($Rs32,$Rt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111011;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2su_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpyhsu($Rs32,$Rt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmpy2es_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyeh($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2es_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyeh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2s_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vmpyh($Rs32,$Rt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2s_s0pack : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vmpyh($Rs32,$Rt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2s_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vmpyh($Rs32,$Rt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2s_s1pack : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vmpyh($Rs32,$Rt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2su_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vmpyhsu($Rs32,$Rt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2su_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vmpyhsu($Rs32,$Rt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vraddh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vraddh($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_9277990 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_vradduh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vradduh($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_9277990 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_vrcmaci_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrcmpyi($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmaci_s0c : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrcmpyi($Rss32,$Rtt32*)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmacr_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrcmpyr($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmacr_s0c : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrcmpyr($Rss32,$Rtt32*)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmpyi_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrcmpyi($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+}
+def M2_vrcmpyi_s0c : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrcmpyi($Rss32,$Rtt32*)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+}
+def M2_vrcmpyr_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrcmpyr($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+}
+def M2_vrcmpyr_s0c : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrcmpyr($Rss32,$Rtt32*)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000011;
+let prefersSlot3 = 1;
+}
+def M2_vrcmpys_acc_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 += vrcmpys($Rss32,$Rt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM> {
+let isPseudo = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmpys_acc_s1_h : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi",
+M_tc_3x_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmpys_acc_s1_l : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo",
+M_tc_3x_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmpys_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vrcmpys($Rss32,$Rt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM> {
+let isPseudo = 1;
+}
+def M2_vrcmpys_s1_h : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vrcmpys_s1_l : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vrcmpys_s1rp : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = vrcmpys($Rss32,$Rt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+}
+def M2_vrcmpys_s1rp_h : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:hi",
+M_tc_3x_SLOT23, TypeM>, Enc_9277990 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vrcmpys_s1rp_l : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:lo",
+M_tc_3x_SLOT23, TypeM>, Enc_9277990 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vrmac_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpyh($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrmpy_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpyh($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+}
+def M2_xor_xacc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 ^= xor($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_and_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= and($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_and_andn : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= and($Rs32,~$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_and_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= or($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_and_xor : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= xor($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_cmpyi_wh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = cmpyiwh($Rss32,$Rt32):<<1:rnd:sat",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_14287645 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M4_cmpyi_whc : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = cmpyiwh($Rss32,$Rt32*):<<1:rnd:sat",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_14287645, Requires<[HasV5T]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M4_cmpyr_wh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = cmpyrwh($Rss32,$Rt32):<<1:rnd:sat",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_14287645 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M4_cmpyr_whc : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = cmpyrwh($Rss32,$Rt32*):<<1:rnd:sat",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_14287645, Requires<[HasV5T]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M4_mac_up_s1_sat : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32,$Rt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_mpyri_addi : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii, IntRegs:$Rs32, u6_0Imm:$II),
+"$Rd32 = add(#$Ii,mpyi($Rs32,#$II))",
+ALU64_tc_3x_SLOT23, TypeALU64>, Enc_971574, ImmRegRel {
+let Inst{31-24} = 0b11011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M4_mpyri_addr";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def M4_mpyri_addr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Ru32, IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Rd32 = add($Ru32,mpyi($Rs32,#$Ii))",
+ALU64_tc_3x_SLOT23, TypeALU64>, Enc_236434, ImmRegRel {
+let Inst{31-23} = 0b110111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M4_mpyri_addr";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def M4_mpyri_addr_u2 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Ru32, u6_2Imm:$Ii, IntRegs:$Rs32),
+"$Rd32 = add($Ru32,mpyi(#$Ii,$Rs32))",
+ALU64_tc_3x_SLOT23, TypeALU64>, Enc_9959498 {
+let Inst{31-23} = 0b110111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M4_mpyrr_addi : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = add(#$Ii,mpyi($Rs32,$Rt32))",
+ALU64_tc_3x_SLOT23, TypeALU64>, Enc_2216485, ImmRegRel {
+let Inst{31-23} = 0b110101110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M4_mpyrr_addr";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def M4_mpyrr_addr : HInst<
+(outs IntRegs:$Ry32),
+(ins IntRegs:$Ru32, IntRegs:$Ry32in, IntRegs:$Rs32),
+"$Ry32 = add($Ru32,mpyi($Ry32in,$Rs32))",
+M_tc_3x_SLOT23, TypeM>, Enc_13770697, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M4_mpyrr_addr";
+let InputType = "reg";
+let Constraints = "$Ry32 = $Ry32in";
+}
+def M4_nac_up_s1_sat : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32,$Rt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_or_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= and($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_or_andn : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= and($Rs32,~$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_or_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= or($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_or_xor : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= xor($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_pmpyw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = pmpyw($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101010;
+let prefersSlot3 = 1;
+}
+def M4_pmpyw_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 ^= pmpyw($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M4_vpmpyh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vpmpyh($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101110;
+let prefersSlot3 = 1;
+}
+def M4_vpmpyh_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 ^= vpmpyh($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M4_vrmpyeh_acc_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpyweh($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M4_vrmpyeh_acc_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpyweh($Rss32,$Rtt32):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M4_vrmpyeh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpyweh($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+}
+def M4_vrmpyeh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpyweh($Rss32,$Rtt32):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000110;
+let prefersSlot3 = 1;
+}
+def M4_vrmpyoh_acc_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpywoh($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M4_vrmpyoh_acc_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpywoh($Rss32,$Rtt32):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010111;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M4_vrmpyoh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpywoh($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000001;
+let prefersSlot3 = 1;
+}
+def M4_vrmpyoh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpywoh($Rss32,$Rtt32):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+}
+def M4_xor_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 ^= and($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_xor_andn : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 ^= and($Rs32,~$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_xor_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 ^= or($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_xor_xacc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 ^= xor($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_12702821 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001010100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M5_vdmacbsu : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vdmpybsu($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821, Requires<[HasV5T]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M5_vdmpybsu : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vdmpybsu($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157, Requires<[HasV5T]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M5_vmacbsu : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpybsu($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M5_vmacbuu : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpybu($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M5_vmpybsu : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vmpybsu($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101010;
+let prefersSlot3 = 1;
+}
+def M5_vmpybuu : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vmpybu($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101100;
+let prefersSlot3 = 1;
+}
+def M5_vrmacbsu : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpybsu($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M5_vrmacbuu : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpybu($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M5_vrmpybsu : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpybsu($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000110;
+let prefersSlot3 = 1;
+}
+def M5_vrmpybuu : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpybu($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let prefersSlot3 = 1;
+}
+def M6_vabsdiffb : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vabsdiffb($Rtt32,$Rss32)",
+M_tc_2_SLOT23, TypeM>, Enc_11687333, Requires<[HasV62T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000111;
+let prefersSlot3 = 1;
+}
+def M6_vabsdiffub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vabsdiffub($Rtt32,$Rss32)",
+M_tc_2_SLOT23, TypeM>, Enc_11687333, Requires<[HasV62T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+}
+def PS_loadrbabs : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii),
+"$Rd32 = memb(#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1886960, AddrModeRel {
+let Inst{24-21} = 0b1000;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L4_loadrb_abs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def PS_loadrdabs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins u29_3Imm:$Ii),
+"$Rdd32 = memd(#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4975051, AddrModeRel {
+let Inst{24-21} = 0b1110;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L4_loadrd_abs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 19;
+let opExtentAlign = 3;
+}
+def PS_loadrhabs : HInst<
+(outs IntRegs:$Rd32),
+(ins u31_1Imm:$Ii),
+"$Rd32 = memh(#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_12608570, AddrModeRel {
+let Inst{24-21} = 0b1010;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L4_loadrh_abs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def PS_loadriabs : HInst<
+(outs IntRegs:$Rd32),
+(ins u30_2Imm:$Ii),
+"$Rd32 = memw(#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_8814718, AddrModeRel {
+let Inst{24-21} = 0b1100;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L4_loadri_abs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 18;
+let opExtentAlign = 2;
+}
+def PS_loadrubabs : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii),
+"$Rd32 = memub(#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1886960, AddrModeRel {
+let Inst{24-21} = 0b1001;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L4_loadrub_abs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def PS_loadruhabs : HInst<
+(outs IntRegs:$Rd32),
+(ins u31_1Imm:$Ii),
+"$Rd32 = memuh(#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_12608570, AddrModeRel {
+let Inst{24-21} = 0b1011;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L4_loadruh_abs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def PS_storerbabs : HInst<
+(outs),
+(ins u32_0Imm:$Ii, IntRegs:$Rt32),
+"memb(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeV2LDST>, Enc_12395768, AddrModeRel {
+let Inst{24-21} = 0b0000;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let isPredicable = 1;
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def PS_storerbnewabs : HInst<
+(outs),
+(ins u32_0Imm:$Ii, IntRegs:$Nt8),
+"memb(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeV2LDST>, Enc_4050532, AddrModeRel {
+let Inst{12-11} = 0b00;
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+let opNewValue = 1;
+}
+def PS_storerdabs : HInst<
+(outs),
+(ins u29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"memd(#$Ii) = $Rtt32",
+ST_tc_st_SLOT01, TypeV2LDST>, Enc_11682941, AddrModeRel {
+let Inst{24-21} = 0b0110;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerdabs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 19;
+let opExtentAlign = 3;
+}
+def PS_storerfabs : HInst<
+(outs),
+(ins u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh(#$Ii) = $Rt32.h",
+ST_tc_st_SLOT01, TypeV2LDST>, Enc_1186018, AddrModeRel {
+let Inst{24-21} = 0b0011;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerfabs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def PS_storerhabs : HInst<
+(outs),
+(ins u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeV2LDST>, Enc_1186018, AddrModeRel {
+let Inst{24-21} = 0b0010;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let isPredicable = 1;
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def PS_storerhnewabs : HInst<
+(outs),
+(ins u31_1Imm:$Ii, IntRegs:$Nt8),
+"memh(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeV2LDST>, Enc_13618890, AddrModeRel {
+let Inst{12-11} = 0b01;
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+let opNewValue = 1;
+}
+def PS_storeriabs : HInst<
+(outs),
+(ins u30_2Imm:$Ii, IntRegs:$Rt32),
+"memw(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeV2LDST>, Enc_15999208, AddrModeRel {
+let Inst{24-21} = 0b0100;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let isPredicable = 1;
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 18;
+let opExtentAlign = 2;
+}
+def PS_storerinewabs : HInst<
+(outs),
+(ins u30_2Imm:$Ii, IntRegs:$Nt8),
+"memw(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeV2LDST>, Enc_12297800, AddrModeRel {
+let Inst{12-11} = 0b10;
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 18;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def S2_addasl_rrri : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32, u3_0Imm:$Ii),
+"$Rd32 = addasl($Rt32,$Rs32,#$Ii)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_3494181 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S2_allocframe : HInst<
+(outs),
+(ins u11_3Imm:$Ii),
+"allocframe(#$Ii)",
+ST_tc_ld_SLOT0, TypeST>, Enc_15830826 {
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b10100000100;
+let Inst{20-16} = 0b11101;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let Uses = [R29, R30, R31];
+let Defs = [R29, R30];
+}
+def S2_asl_i_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = asl($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995 {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b10000000000;
+}
+def S2_asl_i_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 += asl($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_i_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 &= asl($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_i_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 -= asl($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_i_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 |= asl($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_i_p_xacc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 ^= asl($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b10000010100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_i_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = asl($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_asl_i_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 += asl($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_i_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 &= asl($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_i_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 -= asl($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_i_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 |= asl($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_i_r_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = asl($Rs32,#$Ii):sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_asl_i_r_xacc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 ^= asl($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_i_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rdd32 = vaslh($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2082775 {
+let Inst{7-5} = 0b010;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b10000000100;
+}
+def S2_asl_i_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
+"$Rdd32 = vaslw($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13201267 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10000000010;
+}
+def S2_asl_r_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = asl($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011100;
+}
+def S2_asl_r_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 += asl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_r_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 &= asl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_r_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 -= asl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_r_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 |= asl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_r_p_xor : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 ^= asl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_r_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = asl($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_asl_r_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += asl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_r_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= asl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_r_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= asl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_r_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= asl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_r_r_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = asl($Rs32,$Rt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_asl_r_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vaslh($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011010;
+}
+def S2_asl_r_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vaslw($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011000;
+}
+def S2_asr_i_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = asr($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995 {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b10000000000;
+}
+def S2_asr_i_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 += asr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b100;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_i_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 &= asr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_i_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 -= asr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_i_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 |= asr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b100;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_i_p_rnd : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = asr($Rss32,#$Ii):rnd",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995, Requires<[HasV5T]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b10000000110;
+let prefersSlot3 = 1;
+}
+def S2_asr_i_p_rnd_goodsyntax : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = asrrnd($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Requires<[HasV5T]> {
+let isPseudo = 1;
+}
+def S2_asr_i_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = asr($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_asr_i_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 += asr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_i_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 &= asr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_i_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 -= asr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_i_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 |= asr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_i_r_rnd : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = asr($Rs32,#$Ii):rnd",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S2_asr_i_r_rnd_goodsyntax : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = asrrnd($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+}
+def S2_asr_i_svw_trun : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
+"$Rd32 = vasrw($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2380082 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_asr_i_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rdd32 = vasrh($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2082775 {
+let Inst{7-5} = 0b000;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b10000000100;
+}
+def S2_asr_i_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
+"$Rdd32 = vasrw($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13201267 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10000000010;
+}
+def S2_asr_r_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = asr($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011100;
+}
+def S2_asr_r_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 += asr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_r_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 &= asr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_r_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 -= asr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_r_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 |= asr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_r_p_xor : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 ^= asr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_r_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = asr($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_asr_r_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += asr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_r_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= asr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_r_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= asr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_r_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= asr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_r_r_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = asr($Rs32,$Rt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_asr_r_svw_trun : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = vasrw($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14287645 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_asr_r_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vasrh($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011010;
+}
+def S2_asr_r_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vasrw($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011000;
+}
+def S2_brev : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = brev($Rs32)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_brevp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = brev($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000000110;
+}
+def S2_cabacdecbin : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = decbin($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001110;
+let isPredicateLate = 1;
+let prefersSlot3 = 1;
+let Defs = [P0];
+}
+def S2_cl0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = cl0($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_cl0p : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = cl0($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10001000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_cl1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = cl1($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_cl1p : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = cl1($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_clb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = clb($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_clbnorm : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = normamt($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_clbp : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = clb($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_clrbit_i : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = clrbit($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_clrbit_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = clrbit($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_ct0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = ct0($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_ct0p : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = ct0($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10001000111;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_ct1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = ct1($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_ct1p : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = ct1($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001000111;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_deinterleave : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = deinterleave($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000000110;
+}
+def S2_extractu : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
+"$Rd32 = extractu($Rs32,#$Ii,#$II)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_11930928 {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b100011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S2_extractu_rp : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"$Rd32 = extractu($Rs32,$Rtt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_15472748 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S2_extractup : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
+"$Rdd32 = extractu($Rss32,#$Ii,#$II)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_9894557 {
+let Inst{31-24} = 0b10000001;
+let prefersSlot3 = 1;
+}
+def S2_extractup_rp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = extractu($Rss32,$Rtt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001000;
+let prefersSlot3 = 1;
+}
+def S2_insert : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
+"$Rx32 = insert($Rs32,#$Ii,#$II)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2880796 {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b100011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_insert_rp : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"$Rx32 = insert($Rs32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_16311032 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_insertp : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
+"$Rxx32 = insert($Rss32,#$Ii,#$II)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_631197 {
+let Inst{31-24} = 0b10000011;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_insertp_rp : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 = insert($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_12702821 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001010000;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_interleave : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = interleave($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10000000110;
+}
+def S2_lfsp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = lfs($Rss32,$Rtt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001100;
+let prefersSlot3 = 1;
+}
+def S2_lsl_r_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = lsl($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011100;
+}
+def S2_lsl_r_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 += lsl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsl_r_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 &= lsl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsl_r_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 -= lsl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsl_r_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 |= lsl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsl_r_p_xor : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 ^= lsl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsl_r_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = lsl($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_lsl_r_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += lsl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsl_r_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= lsl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsl_r_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= lsl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsl_r_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= lsl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsl_r_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vlslh($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011010;
+}
+def S2_lsl_r_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vlslw($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011000;
+}
+def S2_lsr_i_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = lsr($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995 {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b10000000000;
+}
+def S2_lsr_i_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 += lsr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b101;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_i_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 &= lsr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_i_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 -= lsr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_i_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 |= lsr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b101;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_i_p_xacc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 ^= lsr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b10000010100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_i_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = lsr($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_lsr_i_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 += lsr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_i_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 &= lsr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_i_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 -= lsr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_i_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 |= lsr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_i_r_xacc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 ^= lsr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_i_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rdd32 = vlsrh($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2082775 {
+let Inst{7-5} = 0b001;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b10000000100;
+}
+def S2_lsr_i_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
+"$Rdd32 = vlsrw($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13201267 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10000000010;
+}
+def S2_lsr_r_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = lsr($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011100;
+}
+def S2_lsr_r_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 += lsr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_r_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 &= lsr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_r_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 -= lsr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_r_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 |= lsr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_r_p_xor : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 ^= lsr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_r_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = lsr($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_lsr_r_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += lsr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_r_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= lsr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_r_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= lsr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_r_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= lsr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_r_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vlsrh($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011010;
+}
+def S2_lsr_r_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vlsrw($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011000;
+}
+def S2_packhl : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = packhl($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110101100;
+let InputType = "reg";
+}
+def S2_parityp : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = parity($Rss32,$Rtt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_9277990 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S2_pstorerbf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memb($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_14044877, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000100000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S2_pstorerbf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memb($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_8065534, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4) memb($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerbfnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memb($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_8065534, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbnewf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memb($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_1737833, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b01000100101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S2_pstorerbnewf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memb($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_2813446, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerb_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbnewf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if (!$Pv4) memb($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S2_pstorerbnewfnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memb($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_2813446, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerb_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbnewt_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memb($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_1737833, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b01000000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S2_pstorerbnewt_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memb($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_2813446, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerb_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbnewt_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if ($Pv4) memb($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S2_pstorerbnewtnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memb($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_2813446, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerb_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbt_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memb($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_14044877, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000000000;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S2_pstorerbt_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memb($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_8065534, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011000;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbt_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4) memb($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerbtnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memb($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_8065534, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011000;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerdf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4) memd($Rs32+#$Ii) = $Rtt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11049656, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000100110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "imm";
+let BaseOpcode = "S2_storerd_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def S2_pstorerdf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4) memd($Rx32++#$Ii) = $Rtt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11959851, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerdf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"if (!$Pv4) memd($Rs32) = $Rtt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerdfnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4.new) memd($Rx32++#$Ii) = $Rtt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11959851, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerdt_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4) memd($Rs32+#$Ii) = $Rtt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11049656, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000000110;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "imm";
+let BaseOpcode = "S2_storerd_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def S2_pstorerdt_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4) memd($Rx32++#$Ii) = $Rtt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11959851, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011110;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerdt_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"if ($Pv4) memd($Rs32) = $Rtt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerdtnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4.new) memd($Rx32++#$Ii) = $Rtt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11959851, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011110;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerff_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rs32+#$Ii) = $Rt32.h",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000100011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "imm";
+let BaseOpcode = "S2_storerf_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S2_pstorerff_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rx32++#$Ii) = $Rt32.h",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerf_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerff_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rs32) = $Rt32.h",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerffnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32.h",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerf_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerft_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh($Rs32+#$Ii) = $Rt32.h",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000000011;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "imm";
+let BaseOpcode = "S2_storerf_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S2_pstorerft_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh($Rx32++#$Ii) = $Rt32.h",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011011;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerf_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerft_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4) memh($Rs32) = $Rt32.h",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerftnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32.h",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011011;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerf_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerhf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000100010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S2_pstorerhf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerhf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerhfnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerhnewf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memh($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_6154421, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b01000100101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+let opNewValue = 3;
+}
+def S2_pstorerhnewf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memh($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_3813442, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerh_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerhnewf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if (!$Pv4) memh($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S2_pstorerhnewfnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memh($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_3813442, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerh_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerhnewt_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memh($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_6154421, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b01000000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+let opNewValue = 3;
+}
+def S2_pstorerhnewt_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memh($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_3813442, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerh_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerhnewt_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if ($Pv4) memh($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S2_pstorerhnewtnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memh($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_3813442, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerh_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerht_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000000010;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S2_pstorerht_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011010;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerht_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4) memh($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerhtnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011010;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerif_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memw($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_8225953, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000100100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def S2_pstorerif_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memw($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_10065510, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storeri_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerif_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4) memw($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerifnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memw($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_10065510, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeri_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerinewf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memw($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_11224149, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b01000100101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+let opNewValue = 3;
+}
+def S2_pstorerinewf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memw($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_4813442, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeri_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerinewf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if (!$Pv4) memw($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S2_pstorerinewfnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memw($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_4813442, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeri_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerinewt_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memw($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_11224149, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b01000000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+let opNewValue = 3;
+}
+def S2_pstorerinewt_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memw($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_4813442, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeri_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerinewt_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if ($Pv4) memw($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S2_pstorerinewtnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memw($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_4813442, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeri_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerit_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memw($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_8225953, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000000100;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def S2_pstorerit_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memw($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_10065510, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011100;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storeri_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerit_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4) memw($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstoreritnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memw($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_10065510, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011100;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storeri_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_setbit_i : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = setbit($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_setbit_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = setbit($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_shuffeb : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = shuffeb($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001000;
+}
+def S2_shuffeh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = shuffeh($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001000;
+}
+def S2_shuffob : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = shuffob($Rtt32,$Rss32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_11687333 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001000;
+}
+def S2_shuffoh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = shuffoh($Rtt32,$Rss32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001100;
+}
+def S2_storerb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_13150110, AddrModeRel {
+let Inst{24-21} = 0b1000;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isPredicable = 1;
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+}
+def S2_storerb_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memb($Rx32++$Mu2:brev) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914, AddrModeRel {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101111000;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_pbr";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerb_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
+"memb($Rx32++#$Ii:circ($Mu2)) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_3915770 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{31-21} = 0b10101001000;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let Uses = [CS];
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerb_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memb($Rx32++I:circ($Mu2)) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000010;
+let Inst{31-21} = 0b10101001000;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let Uses = [CS];
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerb_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_12492533, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011000;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_pi";
+let isPredicable = 1;
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerb_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memb($Rx32++$Mu2) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101101000;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memb($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_storerbgp : HInst<
+(outs),
+(ins u32_0Imm:$Ii, IntRegs:$Rt32),
+"memb(gp+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_12395768, AddrModeRel {
+let Inst{24-21} = 0b0000;
+let Inst{31-27} = 0b01001;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storerbabs";
+let isPredicable = 1;
+let isNVStorable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def S2_storerbnew_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Nt8),
+"memb($Rs32+#$Ii) = $Nt8.new",
+ST_tc_st_SLOT0, TypeST>, Enc_10002182, AddrModeRel {
+let Inst{12-11} = 0b00;
+let Inst{24-21} = 0b1101;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S2_storerbnew_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memb($Rx32++$Mu2:brev) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774, AddrModeRel {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b10101111101;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storerb_pbr";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerbnew_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
+"memb($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_5326450 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b10101001101;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [CS];
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerbnew_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memb($Rx32++I:circ($Mu2)) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774 {
+let Inst{7-0} = 0b00000010;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b10101001101;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [CS];
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerbnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
+"memb($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_5900401, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b10101011101;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storerb_pi";
+let isPredicable = 1;
+let isNVStorable = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerbnew_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memb($Rx32++$Mu2) = $Nt8.new",
+ST_tc_st_SLOT0, TypeST>, Enc_10067774 {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b10101101101;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerbnew_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Nt8),
+"memb($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 1;
+}
+def S2_storerbnewgp : HInst<
+(outs),
+(ins u32_0Imm:$Ii, IntRegs:$Nt8),
+"memb(gp+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_4050532, AddrModeRel {
+let Inst{12-11} = 0b00;
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b01001;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storerbabs";
+let isPredicable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+let opNewValue = 1;
+}
+def S2_storerd_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"memd($Rs32+#$Ii) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16319737, AddrModeRel {
+let Inst{24-21} = 0b1110;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "imm";
+let BaseOpcode = "S2_storerd_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 14;
+let opExtentAlign = 3;
+}
+def S2_storerd_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
+"memd($Rx32++$Mu2:brev) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_15816255 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101111110;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerd_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2, DoubleRegs:$Rtt32),
+"memd($Rx32++#$Ii:circ($Mu2)) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_4501395 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{31-21} = 0b10101001110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerd_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
+"memd($Rx32++I:circ($Mu2)) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_15816255 {
+let Inst{7-0} = 0b00000010;
+let Inst{31-21} = 0b10101001110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerd_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
+"memd($Rx32++#$Ii) = $Rtt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11271630, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerd_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerd_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
+"memd($Rx32++$Mu2) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_15816255 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101101110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerd_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"memd($Rs32) = $Rtt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_storerdgp : HInst<
+(outs),
+(ins u29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"memd(gp+#$Ii) = $Rtt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11682941, AddrModeRel {
+let Inst{24-21} = 0b0110;
+let Inst{31-27} = 0b01001;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storerdabs";
+let isPredicable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 19;
+let opExtentAlign = 3;
+}
+def S2_storerf_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_7736768, AddrModeRel {
+let Inst{24-21} = 0b1011;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "imm";
+let BaseOpcode = "S2_storerf_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def S2_storerf_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++$Mu2:brev) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101111011;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerf_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++#$Ii:circ($Mu2)) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_10915758 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{31-21} = 0b10101001011;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerf_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++I:circ($Mu2)) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000010;
+let Inst{31-21} = 0b10101001011;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rx32++#$Ii) = $Rt32.h",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11492529, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011011;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerf_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerf_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++$Mu2) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101101011;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerf_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memh($Rs32) = $Rt32.h",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_storerfgp : HInst<
+(outs),
+(ins u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh(gp+#$Ii) = $Rt32.h",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_1186018, AddrModeRel {
+let Inst{24-21} = 0b0011;
+let Inst{31-27} = 0b01001;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storerfabs";
+let isPredicable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def S2_storerh_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7736768, AddrModeRel {
+let Inst{24-21} = 0b1010;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isPredicable = 1;
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def S2_storerh_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++$Mu2:brev) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914, AddrModeRel {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101111010;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_pbr";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerh_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++#$Ii:circ($Mu2)) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_10915758 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{31-21} = 0b10101001010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerh_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++I:circ($Mu2)) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000010;
+let Inst{31-21} = 0b10101001010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerh_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11492529, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_pi";
+let isPredicable = 1;
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerh_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++$Mu2) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101101010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerh_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memh($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_storerhgp : HInst<
+(outs),
+(ins u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh(gp+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_1186018, AddrModeRel {
+let Inst{24-21} = 0b0010;
+let Inst{31-27} = 0b01001;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storerhabs";
+let isPredicable = 1;
+let isNVStorable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def S2_storerhnew_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Nt8),
+"memh($Rs32+#$Ii) = $Nt8.new",
+ST_tc_st_SLOT0, TypeST>, Enc_748676, AddrModeRel {
+let Inst{12-11} = 0b01;
+let Inst{24-21} = 0b1101;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+let opNewValue = 2;
+}
+def S2_storerhnew_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memh($Rx32++$Mu2:brev) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774, AddrModeRel {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b10101111101;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storerh_pbr";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerhnew_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
+"memh($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10326434 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b10101001101;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [CS];
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerhnew_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memh($Rx32++I:circ($Mu2)) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774 {
+let Inst{7-0} = 0b00000010;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b10101001101;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [CS];
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerhnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
+"memh($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_6900405, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b001;
+let Inst{31-21} = 0b10101011101;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storerh_pi";
+let isNVStorable = 1;
+let isPredicable = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerhnew_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memh($Rx32++$Mu2) = $Nt8.new",
+ST_tc_st_SLOT0, TypeST>, Enc_10067774 {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b10101101101;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerhnew_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Nt8),
+"memh($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 1;
+}
+def S2_storerhnewgp : HInst<
+(outs),
+(ins u31_1Imm:$Ii, IntRegs:$Nt8),
+"memh(gp+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_13618890, AddrModeRel {
+let Inst{12-11} = 0b01;
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b01001;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storerhabs";
+let isPredicable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+let opNewValue = 1;
+}
+def S2_storeri_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_6673186, AddrModeRel {
+let Inst{24-21} = 0b1100;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isPredicable = 1;
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 13;
+let opExtentAlign = 2;
+}
+def S2_storeri_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memw($Rx32++$Mu2:brev) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914, AddrModeRel {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101111100;
+let accessSize = WordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storeri_pbr";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storeri_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
+"memw($Rx32++#$Ii:circ($Mu2)) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_9915754 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{31-21} = 0b10101001100;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storeri_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memw($Rx32++I:circ($Mu2)) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000010;
+let Inst{31-21} = 0b10101001100;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storeri_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_10492541, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011100;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storeri_pi";
+let isPredicable = 1;
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storeri_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memw($Rx32++$Mu2) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101101100;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayStore = 1;
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storeri_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memw($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_storerigp : HInst<
+(outs),
+(ins u30_2Imm:$Ii, IntRegs:$Rt32),
+"memw(gp+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_15999208, AddrModeRel {
+let Inst{24-21} = 0b0100;
+let Inst{31-27} = 0b01001;
+let accessSize = WordAccess;
+let mayStore = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storeriabs";
+let isPredicable = 1;
+let isNVStorable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 18;
+let opExtentAlign = 2;
+}
+def S2_storerinew_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Nt8),
+"memw($Rs32+#$Ii) = $Nt8.new",
+ST_tc_st_SLOT0, TypeST>, Enc_8409782, AddrModeRel {
+let Inst{12-11} = 0b10;
+let Inst{24-21} = 0b1101;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 13;
+let opExtentAlign = 2;
+let opNewValue = 2;
+}
+def S2_storerinew_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memw($Rx32++$Mu2:brev) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774, AddrModeRel {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b10101111101;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storeri_pbr";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerinew_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
+"memw($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_11326438 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b10101001101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [CS];
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerinew_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memw($Rx32++I:circ($Mu2)) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774 {
+let Inst{7-0} = 0b00000010;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b10101001101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [CS];
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerinew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
+"memw($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_7900405, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b010;
+let Inst{31-21} = 0b10101011101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storeri_pi";
+let isPredicable = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerinew_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memw($Rx32++$Mu2) = $Nt8.new",
+ST_tc_st_SLOT0, TypeST>, Enc_10067774 {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b10101101101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerinew_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Nt8),
+"memw($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 1;
+}
+def S2_storerinewgp : HInst<
+(outs),
+(ins u30_2Imm:$Ii, IntRegs:$Nt8),
+"memw(gp+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_12297800, AddrModeRel {
+let Inst{12-11} = 0b10;
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b01001;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storeriabs";
+let isPredicable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 18;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def S2_storew_locked : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memw_locked($Rs32,$Pd4) = $Rt32",
+ST_tc_ld_SLOT0, TypeST>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10100000101;
+let accessSize = WordAccess;
+let isSoloAX = 1;
+let mayStore = 1;
+let isPredicateLate = 1;
+}
+def S2_svsathb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = vsathb($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_svsathub : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = vsathub($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_tableidxb : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
+"$Rx32 = tableidxb($Rs32,#$Ii,#$II):raw",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8838398 {
+let Inst{31-22} = 0b1000011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxb_goodsyntax : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
+"$Rx32 = tableidxb($Rs32,#$Ii,#$II)",
+S_2op_tc_1_SLOT23, TypeS_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxd : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
+"$Rx32 = tableidxd($Rs32,#$Ii,#$II):raw",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8838398 {
+let Inst{31-22} = 0b1000011111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxd_goodsyntax : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
+"$Rx32 = tableidxd($Rs32,#$Ii,#$II)",
+S_2op_tc_1_SLOT23, TypeS_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxh : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
+"$Rx32 = tableidxh($Rs32,#$Ii,#$II):raw",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8838398 {
+let Inst{31-22} = 0b1000011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxh_goodsyntax : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
+"$Rx32 = tableidxh($Rs32,#$Ii,#$II)",
+S_2op_tc_1_SLOT23, TypeS_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxw : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
+"$Rx32 = tableidxw($Rs32,#$Ii,#$II):raw",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8838398 {
+let Inst{31-22} = 0b1000011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxw_goodsyntax : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
+"$Rx32 = tableidxw($Rs32,#$Ii,#$II)",
+S_2op_tc_1_SLOT23, TypeS_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_togglebit_i : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = togglebit($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_togglebit_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = togglebit($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_tstbit_i : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Pd4 = tstbit($Rs32,#$Ii)",
+S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_2103742 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10000101000;
+}
+def S2_tstbit_r : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = tstbit($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111000;
+}
+def S2_valignib : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, u3_0Imm:$Ii),
+"$Rdd32 = valignb($Rtt32,$Rss32,#$Ii)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_11971407 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000000000;
+}
+def S2_valignrb : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, PredRegs:$Pu4),
+"$Rdd32 = valignb($Rtt32,$Rss32,$Pu4)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_11552785 {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000010000;
+}
+def S2_vcnegh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vcnegh($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def S2_vcrotate : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vcrotate($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def S2_vrcnegh : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 += vrcnegh($Rss32,$Rt32)",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_vrndpackwh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vrndwh($Rss32)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_vrndpackwhs : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vrndwh($Rss32):sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10001000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_vsathb : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vsathb($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_vsathb_nopack : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vsathb($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10000000000;
+let Defs = [USR_OVF];
+}
+def S2_vsathub : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vsathub($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_vsathub_nopack : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vsathub($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000000000;
+let Defs = [USR_OVF];
+}
+def S2_vsatwh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vsatwh($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_vsatwh_nopack : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vsatwh($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000000000;
+let Defs = [USR_OVF];
+}
+def S2_vsatwuh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vsatwuh($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_vsatwuh_nopack : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vsatwuh($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10000000000;
+let Defs = [USR_OVF];
+}
+def S2_vsplatrb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = vsplatb($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+}
+def S2_vsplatrh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = vsplath($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10000100010;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+}
+def S2_vspliceib : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, u3_0Imm:$Ii),
+"$Rdd32 = vspliceb($Rss32,$Rtt32,#$Ii)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_16730127 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000000100;
+}
+def S2_vsplicerb : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Pu4),
+"$Rdd32 = vspliceb($Rss32,$Rtt32,$Pu4)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_5178985 {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000010100;
+}
+def S2_vsxtbh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = vsxtbh($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10000100000;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+}
+def S2_vsxthw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = vsxthw($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000100000;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+}
+def S2_vtrunehb : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vtrunehb($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10001000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_vtrunewh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vtrunewh($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001100;
+}
+def S2_vtrunohb : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vtrunohb($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_vtrunowh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vtrunowh($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001100;
+}
+def S2_vzxtbh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = vzxtbh($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10000100000;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+}
+def S2_vzxthw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = vzxthw($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000100000;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+}
+def S4_addaddi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, s32_0Imm:$Ii),
+"$Rd32 = add($Rs32,add($Ru32,#$Ii))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_6495334 {
+let Inst{31-23} = 0b110110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_addi_asl_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = add(#$Ii,asl($Rx32in,#$II))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b100;
+let Inst{4-4} = 0b0;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_addi_lsr_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = add(#$Ii,lsr($Rx32in,#$II))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b100;
+let Inst{4-4} = 0b1;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_andi_asl_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = and(#$Ii,asl($Rx32in,#$II))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b000;
+let Inst{4-4} = 0b0;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_andi_lsr_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = and(#$Ii,lsr($Rx32in,#$II))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b000;
+let Inst{4-4} = 0b1;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_clbaddi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s6_0Imm:$Ii),
+"$Rd32 = add(clb($Rs32),#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_5523416 {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b10001100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S4_clbpaddi : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, s6_0Imm:$Ii),
+"$Rd32 = add(clb($Rss32),#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_10188026 {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b10001000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S4_clbpnorm : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = normamt($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S4_extract : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
+"$Rd32 = extract($Rs32,#$Ii,#$II)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_11930928 {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b100011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S4_extract_rp : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"$Rd32 = extract($Rs32,$Rtt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_15472748 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S4_extractp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
+"$Rdd32 = extract($Rss32,#$Ii,#$II)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_9894557 {
+let Inst{31-24} = 0b10001010;
+let prefersSlot3 = 1;
+}
+def S4_extractp_rp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = extract($Rss32,$Rtt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001110;
+let prefersSlot3 = 1;
+}
+def S4_lsli : HInst<
+(outs IntRegs:$Rd32),
+(ins s6_0Imm:$Ii, IntRegs:$Rt32),
+"$Rd32 = lsl(#$Ii,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_518319 {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S4_ntstbit_i : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Pd4 = !tstbit($Rs32,#$Ii)",
+S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_2103742 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10000101001;
+}
+def S4_ntstbit_r : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = !tstbit($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111001;
+}
+def S4_or_andi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rx32 |= and($Rs32,#$Ii)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_6356866 {
+let Inst{31-22} = 0b1101101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_or_andix : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Ru32, IntRegs:$Rx32in, s32_0Imm:$Ii),
+"$Rx32 = or($Ru32,and($Rx32in,#$Ii))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_7504828 {
+let Inst{31-22} = 0b1101101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_or_ori : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rx32 |= or($Rs32,#$Ii)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_6356866 {
+let Inst{31-22} = 0b1101101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_ori_asl_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = or(#$Ii,asl($Rx32in,#$II))",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b010;
+let Inst{4-4} = 0b0;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_ori_lsr_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = or(#$Ii,lsr($Rx32in,#$II))",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b010;
+let Inst{4-4} = 0b1;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_parity : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = parity($Rs32,$Rt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S4_pstorerbf_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memb(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111000000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerbf_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerbfnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memb(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111000000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerbfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memb($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_14044877, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000110000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerbfnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110111000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerbfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4.new) memb($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerbnewf_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memb(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b000;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerbnewf_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b00;
+let Inst{31-21} = 0b00110101101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let opNewValue = 4;
+}
+def S4_pstorerbnewfnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memb(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b100;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerbnewfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memb($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_1737833, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b01000110101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S4_pstorerbnewfnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b00;
+let Inst{31-21} = 0b00110111101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let opNewValue = 4;
+}
+def S4_pstorerbnewfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if (!$Pv4.new) memb($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S4_pstorerbnewt_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memb(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b000;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerbnewt_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b00;
+let Inst{31-21} = 0b00110100101;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let opNewValue = 4;
+}
+def S4_pstorerbnewtnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memb(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b100;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerbnewtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memb($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_1737833, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b01000010101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S4_pstorerbnewtnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b00;
+let Inst{31-21} = 0b00110110101;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let opNewValue = 4;
+}
+def S4_pstorerbnewtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if ($Pv4.new) memb($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S4_pstorerbt_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memb(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111000000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerbt_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110100000;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerbtnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memb(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111000000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerbtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memb($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_14044877, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000010000;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerbtnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110110000;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerbtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4.new) memb($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerdf_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4) memd(#$Ii) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_13715847, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111110000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerdabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerdf_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_9920336, AddrModeRel {
+let Inst{31-21} = 0b00110101110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "reg";
+let BaseOpcode = "S2_storerd_rr";
+}
+def S4_pstorerdfnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4.new) memd(#$Ii) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_13715847, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111110000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerdabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerdfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4.new) memd($Rs32+#$Ii) = $Rtt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11049656, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000110110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "imm";
+let BaseOpcode = "S2_storerd_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def S4_pstorerdfnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_9920336, AddrModeRel {
+let Inst{31-21} = 0b00110111110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "reg";
+let BaseOpcode = "S2_storerd_rr";
+}
+def S4_pstorerdfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"if (!$Pv4.new) memd($Rs32) = $Rtt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerdt_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4) memd(#$Ii) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_13715847, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111110000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerdabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerdt_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_9920336, AddrModeRel {
+let Inst{31-21} = 0b00110100110;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "reg";
+let BaseOpcode = "S2_storerd_rr";
+}
+def S4_pstorerdtnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4.new) memd(#$Ii) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_13715847, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111110000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerdabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerdtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4.new) memd($Rs32+#$Ii) = $Rtt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11049656, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000010110;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "imm";
+let BaseOpcode = "S2_storerd_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def S4_pstorerdtnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_9920336, AddrModeRel {
+let Inst{31-21} = 0b00110110110;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "reg";
+let BaseOpcode = "S2_storerd_rr";
+}
+def S4_pstorerdtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"if ($Pv4.new) memd($Rs32) = $Rtt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerff_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh(#$Ii) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerfabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerff_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110101011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "reg";
+let BaseOpcode = "S4_storerf_rr";
+}
+def S4_pstorerffnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh(#$Ii) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerfabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerffnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32.h",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000110011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "imm";
+let BaseOpcode = "S2_storerf_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S4_pstorerffnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110111011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "reg";
+let BaseOpcode = "S4_storerf_rr";
+}
+def S4_pstorerffnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rs32) = $Rt32.h",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerft_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh(#$Ii) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111011000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerfabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerft_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110100011;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "reg";
+let BaseOpcode = "S4_storerf_rr";
+}
+def S4_pstorerftnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh(#$Ii) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111011000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerfabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerftnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32.h",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000010011;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "imm";
+let BaseOpcode = "S2_storerf_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S4_pstorerftnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110110011;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "reg";
+let BaseOpcode = "S4_storerf_rr";
+}
+def S4_pstorerftnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rs32) = $Rt32.h",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerhf_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111010000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerhf_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110101010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerhfnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111010000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerhfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000110010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S4_pstorerhfnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110111010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerhfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerhnewf_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memh(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b001;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerhnewf_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b01;
+let Inst{31-21} = 0b00110101101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let opNewValue = 4;
+}
+def S4_pstorerhnewfnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memh(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b101;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerhnewfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memh($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_6154421, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b01000110101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+let opNewValue = 3;
+}
+def S4_pstorerhnewfnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b01;
+let Inst{31-21} = 0b00110111101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let opNewValue = 4;
+}
+def S4_pstorerhnewfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if (!$Pv4.new) memh($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S4_pstorerhnewt_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memh(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b001;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerhnewt_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b01;
+let Inst{31-21} = 0b00110100101;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let opNewValue = 4;
+}
+def S4_pstorerhnewtnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memh(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b101;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerhnewtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memh($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_6154421, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b01000010101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+let opNewValue = 3;
+}
+def S4_pstorerhnewtnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b01;
+let Inst{31-21} = 0b00110110101;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let opNewValue = 4;
+}
+def S4_pstorerhnewtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if ($Pv4.new) memh($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S4_pstorerht_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111010000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerht_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110100010;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerhtnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111010000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerhtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000010010;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S4_pstorerhtnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110110010;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerhtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerif_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memw(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111100000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerif_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110101100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerifnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memw(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111100000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerifnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memw($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_8225953, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000110100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def S4_pstorerifnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110111100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerifnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4.new) memw($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerinewf_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memw(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b010;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerinewf_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b10;
+let Inst{31-21} = 0b00110101101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let opNewValue = 4;
+}
+def S4_pstorerinewfnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memw(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b110;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerinewfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memw($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_11224149, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b01000110101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+let opNewValue = 3;
+}
+def S4_pstorerinewfnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b10;
+let Inst{31-21} = 0b00110111101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let opNewValue = 4;
+}
+def S4_pstorerinewfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if (!$Pv4.new) memw($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S4_pstorerinewt_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memw(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b010;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerinewt_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b10;
+let Inst{31-21} = 0b00110100101;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let opNewValue = 4;
+}
+def S4_pstorerinewtnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memw(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b110;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerinewtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memw($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_11224149, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b01000010101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+let opNewValue = 3;
+}
+def S4_pstorerinewtnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b10;
+let Inst{31-21} = 0b00110110101;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let opNewValue = 4;
+}
+def S4_pstorerinewtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if ($Pv4.new) memw($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S4_pstorerit_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memw(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111100000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerit_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110100100;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let isNVStorable = 1;
+}
+def S4_pstoreritnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memw(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111100000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstoreritnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memw($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_8225953, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000010100;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def S4_pstoreritnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110110100;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let isNVStorable = 1;
+}
+def S4_pstoreritnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4.new) memw($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_stored_locked : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"memd_locked($Rs32,$Pd4) = $Rtt32",
+ST_tc_ld_SLOT0, TypeST>, Enc_2921694 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10100000111;
+let accessSize = DoubleWordAccess;
+let isSoloAX = 1;
+let mayStore = 1;
+let isPredicateLate = 1;
+}
+def S4_storeirb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
+"memb($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11282123, PredNewRel {
+let Inst{31-21} = 0b00111100000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirb_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def S4_storeirb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, s8_0Imm:$II),
+"memb($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirbf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
+"if (!$Pv4) memb($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_5967898, PredNewRel {
+let Inst{31-21} = 0b00111000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirbf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if (!$Pv4) memb($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirbfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
+"if (!$Pv4.new) memb($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_5967898, PredNewRel {
+let Inst{31-21} = 0b00111001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirbfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if (!$Pv4.new) memb($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirbt_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
+"if ($Pv4) memb($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_5967898, PredNewRel {
+let Inst{31-21} = 0b00111000000;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirbt_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if ($Pv4) memb($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirbtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
+"if ($Pv4.new) memb($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_5967898, PredNewRel {
+let Inst{31-21} = 0b00111001000;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirbtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if ($Pv4.new) memb($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirh_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
+"memh($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_10282127, PredNewRel {
+let Inst{31-21} = 0b00111100001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirh_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def S4_storeirh_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, s8_0Imm:$II),
+"memh($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirhf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
+"if (!$Pv4) memh($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_4967902, PredNewRel {
+let Inst{31-21} = 0b00111000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirhf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if (!$Pv4) memh($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirhfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
+"if (!$Pv4.new) memh($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_4967902, PredNewRel {
+let Inst{31-21} = 0b00111001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirhfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if (!$Pv4.new) memh($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirht_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
+"if ($Pv4) memh($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_4967902, PredNewRel {
+let Inst{31-21} = 0b00111000001;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirht_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if ($Pv4) memh($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirhtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
+"if ($Pv4.new) memh($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_4967902, PredNewRel {
+let Inst{31-21} = 0b00111001001;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirhtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if ($Pv4.new) memh($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeiri_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
+"memw($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_9282127, PredNewRel {
+let Inst{31-21} = 0b00111100010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S4_storeiri_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def S4_storeiri_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, s8_0Imm:$II),
+"memw($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirif_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
+"if (!$Pv4) memw($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_3967902, PredNewRel {
+let Inst{31-21} = 0b00111000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S4_storeiri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirif_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if (!$Pv4) memw($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirifnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
+"if (!$Pv4.new) memw($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_3967902, PredNewRel {
+let Inst{31-21} = 0b00111001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S4_storeiri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirifnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if (!$Pv4.new) memw($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirit_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
+"if ($Pv4) memw($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_3967902, PredNewRel {
+let Inst{31-21} = 0b00111000010;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S4_storeiri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirit_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if ($Pv4) memw($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeiritnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
+"if ($Pv4.new) memw($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_3967902, PredNewRel {
+let Inst{31-21} = 0b00111001010;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S4_storeiri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeiritnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if ($Pv4.new) memw($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storerb_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Rt32),
+"memb($Re32=#$II) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_11477246, AddrModeRel {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_ap";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerb_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_14046916, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111011000;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let isNVStorable = 1;
+let isPredicable = 1;
+}
+def S4_storerb_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
+"memb($Ru32<<#$Ii+#$II) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_14689096, AddrModeRel, ImmRegShl {
+let Inst{7-7} = 0b1;
+let Inst{31-21} = 0b10101101000;
+let addrMode = BaseLongOffset;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S4_storerb_ur";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerbnew_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Nt8),
+"memb($Re32=#$II) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_14193700, AddrModeRel {
+let Inst{7-6} = 0b10;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b10101011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storerb_ap";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_storerbnew_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_5486172, AddrModeRel {
+let Inst{6-3} = 0b0000;
+let Inst{31-21} = 0b00111011101;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let isPredicable = 1;
+let opNewValue = 3;
+}
+def S4_storerbnew_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
+"memb($Ru32<<#$Ii+#$II) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10076500, AddrModeRel {
+let Inst{7-7} = 0b1;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b10101101101;
+let addrMode = BaseLongOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S4_storerb_ur";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S4_storerd_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, DoubleRegs:$Rtt32),
+"memd($Re32=#$II) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_8131399 {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let BaseOpcode = "S4_storerd_ap";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerd_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
+"memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_9772987, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111011110;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "reg";
+let BaseOpcode = "S2_storerd_rr";
+let isPredicable = 1;
+}
+def S4_storerd_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, DoubleRegs:$Rtt32),
+"memd($Ru32<<#$Ii+#$II) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_12848507, AddrModeRel, ImmRegShl {
+let Inst{7-7} = 0b1;
+let Inst{31-21} = 0b10101101110;
+let addrMode = BaseLongOffset;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "imm";
+let BaseOpcode = "S2_storerd_ur";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerf_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Rt32),
+"memh($Re32=#$II) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_11477246 {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let BaseOpcode = "S4_storerf_ap";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerf_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_14046916, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111011011;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "reg";
+let BaseOpcode = "S4_storerf_rr";
+let isPredicable = 1;
+}
+def S4_storerf_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
+"memh($Ru32<<#$Ii+#$II) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_14689096, AddrModeRel, ImmRegShl {
+let Inst{7-7} = 0b1;
+let Inst{31-21} = 0b10101101011;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "imm";
+let BaseOpcode = "S4_storerf_rr";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerh_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Rt32),
+"memh($Re32=#$II) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_11477246, AddrModeRel {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_ap";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerh_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_14046916, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111011010;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let isNVStorable = 1;
+let isPredicable = 1;
+}
+def S4_storerh_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
+"memh($Ru32<<#$Ii+#$II) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_14689096, AddrModeRel, ImmRegShl {
+let Inst{7-7} = 0b1;
+let Inst{31-21} = 0b10101101010;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_ur";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerhnew_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Nt8),
+"memh($Re32=#$II) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_14193700, AddrModeRel {
+let Inst{7-6} = 0b10;
+let Inst{13-11} = 0b001;
+let Inst{31-21} = 0b10101011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storerh_ap";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_storerhnew_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_5486172, AddrModeRel {
+let Inst{6-3} = 0b0001;
+let Inst{31-21} = 0b00111011101;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let isPredicable = 1;
+let opNewValue = 3;
+}
+def S4_storerhnew_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
+"memh($Ru32<<#$Ii+#$II) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10076500, AddrModeRel {
+let Inst{7-7} = 0b1;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b10101101101;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerh_ur";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S4_storeri_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Rt32),
+"memw($Re32=#$II) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_11477246, AddrModeRel {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storeri_ap";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeri_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_14046916, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111011100;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let isNVStorable = 1;
+let isPredicable = 1;
+}
+def S4_storeri_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
+"memw($Ru32<<#$Ii+#$II) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_14689096, AddrModeRel, ImmRegShl {
+let Inst{7-7} = 0b1;
+let Inst{31-21} = 0b10101101100;
+let addrMode = BaseLongOffset;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_ur";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerinew_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Nt8),
+"memw($Re32=#$II) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_14193700, AddrModeRel {
+let Inst{7-6} = 0b10;
+let Inst{13-11} = 0b010;
+let Inst{31-21} = 0b10101011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storeri_ap";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_storerinew_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_5486172, AddrModeRel {
+let Inst{6-3} = 0b0010;
+let Inst{31-21} = 0b00111011101;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let isPredicable = 1;
+let opNewValue = 3;
+}
+def S4_storerinew_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
+"memw($Ru32<<#$Ii+#$II) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10076500, AddrModeRel {
+let Inst{7-7} = 0b1;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b10101101101;
+let addrMode = BaseLongOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeri_ur";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S4_subaddi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Ru32),
+"$Rd32 = add($Rs32,sub(#$Ii,$Ru32))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_6495334 {
+let Inst{31-23} = 0b110110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_subi_asl_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = sub(#$Ii,asl($Rx32in,#$II))",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b110;
+let Inst{4-4} = 0b0;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_subi_lsr_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = sub(#$Ii,lsr($Rx32in,#$II))",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b110;
+let Inst{4-4} = 0b1;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_vrcrotate : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rdd32 = vrcrotate($Rss32,$Rt32,#$Ii)",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_114098 {
+let Inst{7-6} = 0b11;
+let Inst{31-21} = 0b11000011110;
+let prefersSlot3 = 1;
+}
+def S4_vrcrotate_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rxx32 += vrcrotate($Rss32,$Rt32,#$Ii)",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_13114546 {
+let Inst{7-6} = 0b00;
+let Inst{31-21} = 0b11001011101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S4_vxaddsubh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vxaddsubh($Rss32,$Rtt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001010;
+let Defs = [USR_OVF];
+}
+def S4_vxaddsubhr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vxaddsubh($Rss32,$Rtt32):rnd:>>1:sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def S4_vxaddsubw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vxaddsubw($Rss32,$Rtt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001010;
+let Defs = [USR_OVF];
+}
+def S4_vxsubaddh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vxsubaddh($Rss32,$Rtt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001010;
+let Defs = [USR_OVF];
+}
+def S4_vxsubaddhr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vxsubaddh($Rss32,$Rtt32):rnd:>>1:sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def S4_vxsubaddw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vxsubaddw($Rss32,$Rtt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001010;
+let Defs = [USR_OVF];
+}
+def S5_asrhub_rnd_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rd32 = vasrhub($Rss32,#$Ii):raw",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8038806, Requires<[HasV5T]> {
+let Inst{7-5} = 0b100;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b10001000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def S5_asrhub_rnd_sat_goodsyntax : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rd32 = vasrhub($Rss32,#$Ii):rnd:sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Requires<[HasV5T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+}
+def S5_asrhub_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rd32 = vasrhub($Rss32,#$Ii):sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8038806, Requires<[HasV5T]> {
+let Inst{7-5} = 0b101;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b10001000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def S5_popcountp : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = popcount($Rss32)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000011;
+let Inst{31-21} = 0b10001000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S5_vasrhrnd : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rdd32 = vasrh($Rss32,#$Ii):raw",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2082775, Requires<[HasV5T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b10000000001;
+let prefersSlot3 = 1;
+}
+def S5_vasrhrnd_goodsyntax : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rdd32 = vasrh($Rss32,#$Ii):rnd",
+S_2op_tc_1_SLOT23, TypeS_2op>, Requires<[HasV5T]> {
+let isPseudo = 1;
+}
+def S6_rol_i_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = rol($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b10000000000;
+}
+def S6_rol_i_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 += rol($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S6_rol_i_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 &= rol($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S6_rol_i_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 -= rol($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S6_rol_i_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 |= rol($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S6_rol_i_p_xacc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 ^= rol($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b10000010100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S6_rol_i_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = rol($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S6_rol_i_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 += rol($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S6_rol_i_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 &= rol($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S6_rol_i_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 -= rol($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S6_rol_i_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 |= rol($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S6_rol_i_r_xacc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 ^= rol($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S6_vsplatrbp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = vsplatb($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV62T]> {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000100010;
+}
+def S6_vtrunehb_ppp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vtrunehb($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157, Requires<[HasV62T]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001100;
+}
+def S6_vtrunohb_ppp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vtrunohb($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157, Requires<[HasV62T]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001100;
+}
+def SA1_addi : HInst<
+(outs GeneralSubRegs:$Rx16),
+(ins IntRegs:$Rx16in, s32_0Imm:$Ii),
+"$Rx16 = add($Rx16in,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_3974695 {
+let Inst{12-11} = 0b00;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 7;
+let opExtentAlign = 0;
+let Constraints = "$Rx16 = $Rx16in";
+}
+def SA1_addrx : HInst<
+(outs GeneralSubRegs:$Rx16),
+(ins IntRegs:$Rx16in, GeneralSubRegs:$Rs16),
+"$Rx16 = add($Rx16in,$Rs16)",
+PSEUDO, TypeSUBINSN>, Enc_6135183 {
+let Inst{12-8} = 0b11000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+let Constraints = "$Rx16 = $Rx16in";
+}
+def SA1_addsp : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins u6_2Imm:$Ii),
+"$Rd16 = add(r29,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_176263 {
+let Inst{12-10} = 0b011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let Uses = [R29];
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_and1 : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = and($Rs16,#1)",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_clrf : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins),
+"if (!p0) $Rd16 = #0",
+PSEUDO, TypeSUBINSN>, Enc_1451363 {
+let Inst{12-4} = 0b110100111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let Uses = [P0];
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_clrfnew : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins),
+"if (!p0.new) $Rd16 = #0",
+PSEUDO, TypeSUBINSN>, Enc_1451363 {
+let Inst{12-4} = 0b110100101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPredicatedNew = 1;
+let Uses = [P0];
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_clrt : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins),
+"if (p0) $Rd16 = #0",
+PSEUDO, TypeSUBINSN>, Enc_1451363 {
+let Inst{12-4} = 0b110100110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let Uses = [P0];
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_clrtnew : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins),
+"if (p0.new) $Rd16 = #0",
+PSEUDO, TypeSUBINSN>, Enc_1451363 {
+let Inst{12-4} = 0b110100100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPredicatedNew = 1;
+let Uses = [P0];
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_cmpeqi : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u2_0Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_2079016 {
+let Inst{3-2} = 0b00;
+let Inst{12-8} = 0b11001;
+let AsmVariantName = "NonParsable";
+let Defs = [P0];
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_combine0i : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins u2_0Imm:$Ii),
+"$Rdd8 = combine(#0,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_15946706 {
+let Inst{4-3} = 0b00;
+let Inst{12-7} = 0b111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_combine1i : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins u2_0Imm:$Ii),
+"$Rdd8 = combine(#1,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_15946706 {
+let Inst{4-3} = 0b01;
+let Inst{12-7} = 0b111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_combine2i : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins u2_0Imm:$Ii),
+"$Rdd8 = combine(#2,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_15946706 {
+let Inst{4-3} = 0b10;
+let Inst{12-7} = 0b111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_combine3i : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins u2_0Imm:$Ii),
+"$Rdd8 = combine(#3,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_15946706 {
+let Inst{4-3} = 0b11;
+let Inst{12-7} = 0b111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_combinerz : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins GeneralSubRegs:$Rs16),
+"$Rdd8 = combine($Rs16,#0)",
+PSEUDO, TypeSUBINSN>, Enc_10501894 {
+let Inst{3-3} = 0b1;
+let Inst{12-8} = 0b11101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_combinezr : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins GeneralSubRegs:$Rs16),
+"$Rdd8 = combine(#0,$Rs16)",
+PSEUDO, TypeSUBINSN>, Enc_10501894 {
+let Inst{3-3} = 0b0;
+let Inst{12-8} = 0b11101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_dec : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1),
+"$Rd16 = add($Rs16,#$n1)",
+PSEUDO, TypeSUBINSN>, Enc_10597934 {
+let Inst{12-8} = 0b10011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_inc : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = add($Rs16,#1)",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_seti : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins u32_0Imm:$Ii),
+"$Rd16 = #$Ii",
+PSEUDO, TypeSUBINSN>, Enc_2176383 {
+let Inst{12-10} = 0b010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def SA1_setin1 : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins n1Const:$n1),
+"$Rd16 = #$n1",
+PSEUDO, TypeSUBINSN>, Enc_13336212 {
+let Inst{12-4} = 0b110100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_sxtb : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = sxtb($Rs16)",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_sxth : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = sxth($Rs16)",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_tfr : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = $Rs16",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_zxtb : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = and($Rs16,#255)",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_zxth : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = zxth($Rs16)",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SL1_loadri_io : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
+"$Rd16 = memw($Rs16+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_13606251 {
+let Inst{12-12} = 0b0;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let DecoderNamespace = "SUBINSN_L1";
+}
+def SL1_loadrub_io : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
+"$Rd16 = memub($Rs16+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_15606259 {
+let Inst{12-12} = 0b1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let DecoderNamespace = "SUBINSN_L1";
+}
+def SL2_deallocframe : HInst<
+(outs),
+(ins),
+"deallocframe",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111100000000;
+let accessSize = DoubleWordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [R30, R29, R31];
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_jumpr31 : HInst<
+(outs),
+(ins),
+"jumpr r31",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111111000000;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let Uses = [R31];
+let Defs = [PC];
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_jumpr31_f : HInst<
+(outs),
+(ins),
+"if (!p0) jumpr r31",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111111000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let Uses = [P0, R31];
+let Defs = [PC];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_jumpr31_fnew : HInst<
+(outs),
+(ins),
+"if (!p0.new) jumpr:nt r31",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111111000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let isPredicatedNew = 1;
+let Uses = [P0, R31];
+let Defs = [PC];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_jumpr31_t : HInst<
+(outs),
+(ins),
+"if (p0) jumpr r31",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111111000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let Uses = [P0, R31];
+let Defs = [PC];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_jumpr31_tnew : HInst<
+(outs),
+(ins),
+"if (p0.new) jumpr:nt r31",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111111000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let isPredicatedNew = 1;
+let Uses = [P0, R31];
+let Defs = [PC];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_loadrb_io : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, u3_0Imm:$Ii),
+"$Rd16 = memb($Rs16+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_3135259 {
+let Inst{12-11} = 0b10;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_loadrd_sp : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins u5_3Imm:$Ii),
+"$Rdd8 = memd(r29+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_16479122 {
+let Inst{12-8} = 0b11110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let Uses = [R29];
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_loadrh_io : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii),
+"$Rd16 = memh($Rs16+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_4135257 {
+let Inst{12-11} = 0b00;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_loadri_sp : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins u5_2Imm:$Ii),
+"$Rd16 = memw(r29+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_64199 {
+let Inst{12-9} = 0b1110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let Uses = [R29];
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_loadruh_io : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii),
+"$Rd16 = memuh($Rs16+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_4135257 {
+let Inst{12-11} = 0b01;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_return : HInst<
+(outs),
+(ins),
+"dealloc_return",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111101000000;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R30, R29, R31];
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_return_f : HInst<
+(outs),
+(ins),
+"if (!p0) dealloc_return",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let mayLoad = 1;
+let Uses = [P0, R30];
+let Defs = [PC, R30, R29, R31];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_return_fnew : HInst<
+(outs),
+(ins),
+"if (!p0.new) dealloc_return:nt",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111101000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let Uses = [P0, R30];
+let Defs = [PC, R30, R29, R31];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_return_t : HInst<
+(outs),
+(ins),
+"if (p0) dealloc_return",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111101000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let mayLoad = 1;
+let Uses = [P0, R30];
+let Defs = [PC, R30, R29, R31];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_return_tnew : HInst<
+(outs),
+(ins),
+"if (p0.new) dealloc_return:nt",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111101000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let Uses = [P0, R30];
+let Defs = [PC, R30, R29, R31];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SS1_storeb_io : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii, GeneralSubRegs:$Rt16),
+"memb($Rs16+#$Ii) = $Rt16",
+PSEUDO, TypeSUBINSN>, Enc_13204995 {
+let Inst{12-12} = 0b1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S1";
+}
+def SS1_storew_io : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii, GeneralSubRegs:$Rt16),
+"memw($Rs16+#$Ii) = $Rt16",
+PSEUDO, TypeSUBINSN>, Enc_11205051 {
+let Inst{12-12} = 0b0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S1";
+}
+def SS2_allocframe : HInst<
+(outs),
+(ins u5_3Imm:$Ii),
+"allocframe(#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_7884306 {
+let Inst{3-0} = 0b0000;
+let Inst{12-9} = 0b1110;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let Uses = [R30, R29, R31];
+let Defs = [R30, R29];
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_storebi0 : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
+"memb($Rs16+#$Ii) = #0",
+PSEUDO, TypeSUBINSN>, Enc_13536408 {
+let Inst{12-8} = 0b10010;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_storebi1 : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
+"memb($Rs16+#$Ii) = #1",
+PSEUDO, TypeSUBINSN>, Enc_13536408 {
+let Inst{12-8} = 0b10011;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_stored_sp : HInst<
+(outs),
+(ins s6_3Imm:$Ii, GeneralDoubleLow8Regs:$Rtt8),
+"memd(r29+#$Ii) = $Rtt8",
+PSEUDO, TypeSUBINSN>, Enc_9165078 {
+let Inst{12-9} = 0b0101;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let Uses = [R29];
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_storeh_io : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii, GeneralSubRegs:$Rt16),
+"memh($Rs16+#$Ii) = $Rt16",
+PSEUDO, TypeSUBINSN>, Enc_1734121 {
+let Inst{12-11} = 0b00;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_storew_sp : HInst<
+(outs),
+(ins u5_2Imm:$Ii, GeneralSubRegs:$Rt16),
+"memw(r29+#$Ii) = $Rt16",
+PSEUDO, TypeSUBINSN>, Enc_6690615 {
+let Inst{12-9} = 0b0100;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let Uses = [R29];
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_storewi0 : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
+"memw($Rs16+#$Ii) = #0",
+PSEUDO, TypeSUBINSN>, Enc_15536400 {
+let Inst{12-8} = 0b10000;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_storewi1 : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
+"memw($Rs16+#$Ii) = #1",
+PSEUDO, TypeSUBINSN>, Enc_15536400 {
+let Inst{12-8} = 0b10001;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S2";
+}
+def V6_MAP_equb : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_MAP_equb_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_MAP_equb_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equb_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equb_ior : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equb_ior_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equb_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equb_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equh : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_MAP_equh_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_MAP_equh_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equh_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equh_ior : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equh_ior_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equh_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equh_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equw : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_MAP_equw_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_MAP_equw_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equw_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equw_ior : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equw_ior_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equw_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equw_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_extractw : HInst<
+(outs IntRegs:$Rd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rs32),
+"$Rd32 = vextract($Vu32,$Rs32)",
+LD_tc_ld_SLOT0, TypeLD>, Enc_16601956, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10010010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_extractw_128B : HInst<
+(outs IntRegs:$Rd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rs32),
+"$Rd32 = vextract($Vu32,$Rs32)",
+LD_tc_ld_SLOT0, TypeLD>, Enc_16601956, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10010010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_extractw_alt : HInst<
+(outs IntRegs:$Rd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rs32),
+"$Rd32.w = vextract($Vu32,$Rs32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_extractw_alt_128B : HInst<
+(outs IntRegs:$Rd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rs32),
+"$Rd32.w = vextract($Vu32,$Rs32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_hi : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecDblRegs:$Vss32),
+"$Vd32 = hi($Vss32)",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_hi_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecDblRegs128B:$Vss32),
+"$Vd32 = hi($Vss32)",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_ld0 : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vmem($Rt32)",
+PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_ld0_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vmem($Rt32)",
+PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_ldnt0 : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vmem($Rt32):nt",
+PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_ldnt0_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vmem($Rt32):nt",
+PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_ldu0 : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vmemu($Rt32)",
+PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_ldu0_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vmemu($Rt32)",
+PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_lo : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecDblRegs:$Vss32),
+"$Vd32 = lo($Vss32)",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_lo_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecDblRegs128B:$Vss32),
+"$Vd32 = lo($Vss32)",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_lvsplatb : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32.b = vsplat($Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_9768377, Requires<[HasV62T,UseHVX]> {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b00011001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_lvsplatb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32.b = vsplat($Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_9768377, Requires<[HasV62T,UseHVX]> {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b00011001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_lvsplath : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32.h = vsplat($Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_9768377, Requires<[HasV62T,UseHVX]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b00011001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_lvsplath_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32.h = vsplat($Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_9768377, Requires<[HasV62T,UseHVX]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b00011001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_lvsplatw : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vsplat($Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_9768377, Requires<[HasV60T,UseHVX]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_lvsplatw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vsplat($Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_9768377, Requires<[HasV60T,UseHVX]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_and : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4 = and($Qs4,$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_and_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4 = and($Qs4,$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_and_n : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4 = and($Qs4,!$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000101;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_and_n_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4 = and($Qs4,!$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000101;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_not : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4),
+"$Qd4 = not($Qs4)",
+CVI_VA, TypeCVI_VA>, Enc_4897205, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000010;
+let Inst{13-10} = 0b0000;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_not_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4),
+"$Qd4 = not($Qs4)",
+CVI_VA, TypeCVI_VA>, Enc_4897205, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000010;
+let Inst{13-10} = 0b0000;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_or : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4 = or($Qs4,$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000001;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_or_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4 = or($Qs4,$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000001;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_or_n : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4 = or($Qs4,!$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000100;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_or_n_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4 = or($Qs4,!$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000100;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_scalar2 : HInst<
+(outs VecPredRegs:$Qd4),
+(ins IntRegs:$Rt32),
+"$Qd4 = vsetq($Rt32)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_12781442, Requires<[HasV60T,UseHVX]> {
+let Inst{13-2} = 0b000000010001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_scalar2_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins IntRegs:$Rt32),
+"$Qd4 = vsetq($Rt32)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_12781442, Requires<[HasV60T,UseHVX]> {
+let Inst{13-2} = 0b000000010001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_scalar2v2 : HInst<
+(outs VecPredRegs:$Qd4),
+(ins IntRegs:$Rt32),
+"$Qd4 = vsetq2($Rt32)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_12781442, Requires<[HasV62T,UseHVX]> {
+let Inst{13-2} = 0b000000010011;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_scalar2v2_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins IntRegs:$Rt32),
+"$Qd4 = vsetq2($Rt32)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_12781442, Requires<[HasV62T,UseHVX]> {
+let Inst{13-2} = 0b000000010011;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_xor : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4 = xor($Qs4,$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000011;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_xor_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4 = xor($Qs4,$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000011;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_shuffeqh : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4.b = vshuffe($Qs4.h,$Qt4.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV62T,UseHVX]> {
+let Inst{7-2} = 0b000110;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_shuffeqh_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4.b = vshuffe($Qs4.h,$Qt4.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV62T,UseHVX]> {
+let Inst{7-2} = 0b000110;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_shuffeqw : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4.h = vshuffe($Qs4.w,$Qt4.w)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV62T,UseHVX]> {
+let Inst{7-2} = 0b000111;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_shuffeqw_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4.h = vshuffe($Qs4.w,$Qt4.w)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV62T,UseHVX]> {
+let Inst{7-2} = 0b000111;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_st0 : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs:$Vs32),
+"vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_st0_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stn0 : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs:$Os8),
+"vmem($Rt32) = $Os8.new",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 1;
+}
+def V6_stn0_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs128B:$Os8),
+"vmem($Rt32) = $Os8.new",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 1;
+}
+def V6_stnnt0 : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs:$Os8),
+"vmem($Rt32):nt = $Os8.new",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 1;
+}
+def V6_stnnt0_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs128B:$Os8),
+"vmem($Rt32):nt = $Os8.new",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 1;
+}
+def V6_stnp0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stnp0_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stnpnt0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stnpnt0_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stnq0 : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stnq0_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stnqnt0 : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stnqnt0_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stnt0 : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs:$Vs32),
+"vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stnt0_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stp0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stp0_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stpnt0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stpnt0_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stq0 : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stq0_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stqnt0 : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stqnt0_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stu0 : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs:$Vs32),
+"vmemu($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stu0_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"vmemu($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stunp0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if (!$Pv4) vmemu($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stunp0_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmemu($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stup0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if ($Pv4) vmemu($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stup0_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if ($Pv4) vmemu($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32Ub_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32 = vmemu($Rt32+#$Ii)",
+CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32Ub_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32 = vmemu($Rt32+#$Ii)",
+CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32Ub_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32 = vmemu($Rx32++#$Ii)",
+CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32Ub_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32 = vmemu($Rx32++#$Ii)",
+CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32Ub_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32 = vmemu($Rx32++$Mu2)",
+CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000111;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32Ub_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32 = vmemu($Rx32++$Mu2)",
+CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000111;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32 = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32 = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_cur_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.cur = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_cur_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.cur = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_cur_npred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_cur_npred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_cur_npred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_npred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_npred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000101;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_npred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000101;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.cur = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.cur = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.cur = vmem($Rx32++$Mu2)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000001;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.cur = vmem($Rx32++$Mu2)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000001;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_pred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_cur_pred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_cur_pred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
+CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_pred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
+CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_pred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000100;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_pred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000100;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_npred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_npred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_npred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_npred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_npred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000011;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_npred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000011;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32 = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32 = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_cur_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.cur = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_cur_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.cur = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_cur_npred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_cur_npred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_cur_npred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_npred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_npred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000101;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_npred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000101;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.cur = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.cur = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.cur = vmem($Rx32++$Mu2):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000001;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.cur = vmem($Rx32++$Mu2):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000001;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_pred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_cur_pred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_cur_pred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_pred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_pred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
+CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000100;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_pred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
+CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000100;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_npred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_npred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_npred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_npred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_npred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000011;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_npred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000011;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32 = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32 = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32 = vmem($Rx32++$Mu2):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32 = vmem($Rx32++$Mu2):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_pred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_pred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_pred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_pred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_pred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000010;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_pred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000010;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rt32+#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_tmp_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rt32+#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_tmp_npred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_tmp_npred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_tmp_npred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_npred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_npred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000111;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_npred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000111;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rx32++#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rx32++#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.tmp = vmem($Rx32++$Mu2):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000010;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.tmp = vmem($Rx32++$Mu2):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000010;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_pred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_tmp_pred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_tmp_pred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_pred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_pred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000110;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_pred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000110;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32 = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32 = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32 = vmem($Rx32++$Mu2)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32 = vmem($Rx32++$Mu2)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_pred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_pred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_pred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_pred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_pred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32 = vmem($Rx32++$Mu2)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000010;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_pred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32 = vmem($Rx32++$Mu2)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000010;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rt32+#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_tmp_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rt32+#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_tmp_npred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_tmp_npred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_tmp_npred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_npred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_npred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000111;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_npred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000111;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rx32++#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rx32++#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.tmp = vmem($Rx32++$Mu2)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000010;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.tmp = vmem($Rx32++$Mu2)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000010;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_pred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_tmp_pred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_tmp_pred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_pred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_pred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000110;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_pred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000110;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"vmemu($Rt32+#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_6923828, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000001;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ai";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32Ub_ai_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"vmemu($Rt32+#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_5757366, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000001;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ai_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32Ub_npred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Pv4) vmemu($Rt32+#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ai";
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32Ub_npred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmemu($Rt32+#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ai_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32Ub_npred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Pv4) vmemu($Rx32++#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_pi";
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_npred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmemu($Rx32++#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_pi_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_npred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if (!$Pv4) vmemu($Rx32++$Mu2) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000111;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ppu";
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_npred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmemu($Rx32++$Mu2) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000111;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ppu_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"vmemu($Rx32++#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_3296020, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001001;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_pi";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"vmemu($Rx32++#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_2296022, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001001;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_pi_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"vmemu($Rx32++$Mu2) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-5} = 0b00000111;
+let Inst{31-21} = 0b00101011001;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ppu";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"vmemu($Rx32++$Mu2) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-5} = 0b00000111;
+let Inst{31-21} = 0b00101011001;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ppu_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Pv4) vmemu($Rt32+#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ai";
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32Ub_pred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Pv4) vmemu($Rt32+#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ai_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32Ub_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Pv4) vmemu($Rx32++#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_pi";
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_pred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Pv4) vmemu($Rx32++#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_pi_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if ($Pv4) vmemu($Rx32++$Mu2) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000110;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ppu";
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_pred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if ($Pv4) vmemu($Rx32++$Mu2) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000110;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ppu_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_6923828, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000001;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_ai_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_5757366, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000001;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_new_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8),
+"vmem($Rt32+#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_6608821, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000001;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 2;
+}
+def V6_vS32b_new_ai_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8),
+"vmem($Rt32+#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2152247, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000001;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def V6_vS32b_new_npred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8),
+"if (!$Pv4) vmem($Rt32+#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_9372046, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01101;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+}
+def V6_vS32b_new_npred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8),
+"if (!$Pv4) vmem($Rt32+#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_13937564, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01101;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+}
+def V6_vS32b_new_npred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8),
+"if (!$Pv4) vmem($Rx32++#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_3735566, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_npred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8),
+"if (!$Pv4) vmem($Rx32++#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2735552, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_npred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8),
+"if (!$Pv4) vmem($Rx32++$Mu2) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001101;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_npred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8),
+"if (!$Pv4) vmem($Rx32++$Mu2) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001101;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8),
+"vmem($Rx32++#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_12244921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001001;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8),
+"vmem($Rx32++#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_11244923, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001001;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8),
+"vmem($Rx32++$Mu2) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_1589406, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-3} = 0b0000000100;
+let Inst{31-21} = 0b00101011001;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8),
+"vmem($Rx32++$Mu2) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_1589406, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-3} = 0b0000000100;
+let Inst{31-21} = 0b00101011001;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8),
+"if ($Pv4) vmem($Rt32+#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_9372046, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01000;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+}
+def V6_vS32b_new_pred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8),
+"if ($Pv4) vmem($Rt32+#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_13937564, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01000;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+}
+def V6_vS32b_new_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8),
+"if ($Pv4) vmem($Rx32++#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_3735566, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_pred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8),
+"if ($Pv4) vmem($Rx32++#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2735552, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8),
+"if ($Pv4) vmem($Rx32++$Mu2) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001000;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_pred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8),
+"if ($Pv4) vmem($Rx32++$Mu2) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001000;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_npred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_npred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_npred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_npred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_npred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_npred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nqpred_ai : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_16279406, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000100;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_nqpred_ai_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_2703240, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000100;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_nqpred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_12397062, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nqpred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13397056, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nqpred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011100;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nqpred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011100;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_6923828, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000011;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_nt_ai_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_5757366, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000011;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_nt_new_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8),
+"vmem($Rt32+#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_6608821, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000011;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 2;
+}
+def V6_vS32b_nt_new_ai_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8),
+"vmem($Rt32+#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2152247, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000011;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def V6_vS32b_nt_new_npred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8),
+"if (!$Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_9372046, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01111;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+}
+def V6_vS32b_nt_new_npred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8),
+"if (!$Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_13937564, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01111;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+}
+def V6_vS32b_nt_new_npred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8),
+"if (!$Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_3735566, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_npred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8),
+"if (!$Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2735552, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_npred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8),
+"if (!$Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001111;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_npred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8),
+"if (!$Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001111;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8),
+"vmem($Rx32++#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_12244921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001011;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8),
+"vmem($Rx32++#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_11244923, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001011;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8),
+"vmem($Rx32++$Mu2):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_1589406, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-3} = 0b0000000100;
+let Inst{31-21} = 0b00101011011;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8),
+"vmem($Rx32++$Mu2):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_1589406, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-3} = 0b0000000100;
+let Inst{31-21} = 0b00101011011;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8),
+"if ($Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_9372046, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01010;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+}
+def V6_vS32b_nt_new_pred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8),
+"if ($Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_13937564, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01010;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+}
+def V6_vS32b_nt_new_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8),
+"if ($Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_3735566, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_pred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8),
+"if ($Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2735552, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8),
+"if ($Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001010;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_pred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8),
+"if ($Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001010;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_npred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_nt_npred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_nt_npred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_npred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_npred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_npred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_nqpred_ai : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_16279406, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000110;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_nt_nqpred_ai_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_2703240, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000110;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_nt_nqpred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_12397062, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_nqpred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13397056, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_nqpred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011110;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_nqpred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011110;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_3296020, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001011;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_2296022, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001011;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011011;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011011;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_nt_pred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_nt_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_pred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_pred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_qpred_ai : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_16279406, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000110;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_nt_qpred_ai_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_2703240, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000110;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_nt_qpred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_12397062, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_qpred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13397056, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_qpred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011110;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_qpred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011110;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_3296020, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001001;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_2296022, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001001;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011001;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011001;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_pred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15459921, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_pred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_14459927, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_pred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_qpred_ai : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_16279406, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000100;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_qpred_ai_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_2703240, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000100;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_qpred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_12397062, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_qpred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13397056, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_qpred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011100;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_qpred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011100;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vabsdiffh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vabsdiff($Vu32.h,$Vv32.h)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vabsdiff($Vu32.h,$Vv32.h)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vabsdiffh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vabsdiffh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vabsdiff($Vu32.ub,$Vv32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vabsdiff($Vu32.ub,$Vv32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vabsdiffub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vabsdiffub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vabsdiff($Vu32.uh,$Vv32.uh)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vabsdiff($Vu32.uh,$Vv32.uh)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vabsdiffuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vabsdiffuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uw = vabsdiff($Vu32.w,$Vv32.w)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uw = vabsdiff($Vu32.w,$Vv32.w)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vabsdiffw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vabsdiffw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.h = vabs($Vu32.h)",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.h = vabs($Vu32.h)",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vabsh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vabsh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsh_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.h = vabs($Vu32.h):sat",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsh_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.h = vabs($Vu32.h):sat",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsh_sat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vabsh($Vu32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsh_sat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vabsh($Vu32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.w = vabs($Vu32.w)",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.w = vabs($Vu32.w)",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vabsw($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vabsw($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsw_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.w = vabs($Vu32.w):sat",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsw_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.w = vabs($Vu32.w):sat",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsw_sat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vabsw($Vu32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsw_sat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vabsw($Vu32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vadd($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vadd($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddb_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.b = vadd($Vuu32.b,$Vvv32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddb_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.b = vadd($Vuu32.b,$Vvv32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddb_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddb($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddb_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddb($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddbnq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4) $Vx32.b += $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbnq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4) $Vx32.b += $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbnq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4.b) $Vx32.b += $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbnq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4.b) $Vx32.b += $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4) $Vx32.b += $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4) $Vx32.b += $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4.b) $Vx32.b += $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4.b) $Vx32.b += $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vadd($Vu32.b,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddbsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vadd($Vu32.b,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddbsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddbsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddbsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.b = vadd($Vuu32.b,$Vvv32.b):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddbsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.b = vadd($Vuu32.b,$Vvv32.b):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddbsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddb($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddbsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddb($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddcarry : HInst<
+(outs VectorRegs:$Vd32, VecPredRegs:$Qx4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, VecPredRegs:$Qx4in),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qx4):carry",
+CVI_VA, TypeCVI_VA>, Enc_13691337, Requires<[HasV62T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vaddcarry_128B : HInst<
+(outs VectorRegs128B:$Vd32, VecPredRegs128B:$Qx4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, VecPredRegs128B:$Qx4in),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qx4):carry",
+CVI_VA, TypeCVI_VA>, Enc_13691337, Requires<[HasV62T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vaddclbh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vadd(vclb($Vu32.h),$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddclbh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vadd(vclb($Vu32.h),$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddclbw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vadd(vclb($Vu32.w),$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddclbw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vadd(vclb($Vu32.w),$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vadd($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vadd($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddh_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.h = vadd($Vuu32.h,$Vvv32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddh_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.h = vadd($Vuu32.h,$Vvv32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddh_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddh($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddh_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddh($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddhnq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4) $Vx32.h += $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhnq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4) $Vx32.h += $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhnq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4.h) $Vx32.h += $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhnq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4.h) $Vx32.h += $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4) $Vx32.h += $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4) $Vx32.h += $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4.h) $Vx32.h += $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4.h) $Vx32.h += $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vadd($Vu32.h,$Vv32.h):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vadd($Vu32.h,$Vv32.h):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddhsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddhsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.h = vadd($Vuu32.h,$Vvv32.h):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddhsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.h = vadd($Vuu32.h,$Vvv32.h):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddhsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddhsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddhw : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.w = vadd($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddhw_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.w = vadd($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddhw_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.w += vadd($Vu32.h,$Vv32.h)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddhw_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.w += vadd($Vu32.h,$Vv32.h)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddhw_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vaddh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddhw_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vaddh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddhw_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vaddh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddhw_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vaddh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddubh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.h = vadd($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddubh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.h = vadd($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddubh_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.h += vadd($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddubh_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.h += vadd($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddubh_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vaddub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddubh_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vaddub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddubh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vaddub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddubh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vaddub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddubsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vadd($Vu32.ub,$Vv32.ub):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddubsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vadd($Vu32.ub,$Vv32.ub):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddubsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddubsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddubsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.ub = vadd($Vuu32.ub,$Vvv32.ub):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddubsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.ub = vadd($Vuu32.ub,$Vvv32.ub):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddubsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddub($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddubsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddub($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddububb_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vadd($Vu32.ub,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddububb_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vadd($Vu32.ub,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vadd($Vu32.uh,$Vv32.uh):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vadd($Vu32.uh,$Vv32.uh):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vadduh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduhsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vadduh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduhsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.uh = vadd($Vuu32.uh,$Vvv32.uh):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduhsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.uh = vadd($Vuu32.uh,$Vvv32.uh):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduhsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vadduh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduhsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vadduh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduhw : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.w = vadd($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduhw_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.w = vadd($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduhw_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.w += vadd($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vadduhw_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.w += vadd($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vadduhw_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vadduh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vadduhw_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vadduh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vadduhw_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vadduh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduhw_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vadduh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduwsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uw = vadd($Vu32.uw,$Vv32.uw):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduwsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uw = vadd($Vu32.uw,$Vv32.uw):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduwsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vadduw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduwsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vadduw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduwsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.uw = vadd($Vuu32.uw,$Vvv32.uw):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduwsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.uw = vadd($Vuu32.uw,$Vvv32.uw):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduwsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vadduw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduwsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vadduw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddw_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.w = vadd($Vuu32.w,$Vvv32.w)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddw_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.w = vadd($Vuu32.w,$Vvv32.w)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddw_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddw($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddw_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddw($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddwnq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4) $Vx32.w += $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwnq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4) $Vx32.w += $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwnq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4.w) $Vx32.w += $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwnq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4.w) $Vx32.w += $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4) $Vx32.w += $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4) $Vx32.w += $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4.w) $Vx32.w += $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4.w) $Vx32.w += $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddwsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddwsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddwsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddwsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.w = vadd($Vuu32.w,$Vvv32.w):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddwsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.w = vadd($Vuu32.w,$Vvv32.w):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddwsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddwsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_valignb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = valign($Vu32,$Vv32,$Rt8)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_valignb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = valign($Vu32,$Vv32,$Rt8)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_valignbi : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii),
+"$Vd32 = valign($Vu32,$Vv32,#$Ii)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV60T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_valignbi_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii),
+"$Vd32 = valign($Vu32,$Vv32,#$Ii)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV60T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vand : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vand($Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vand_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vand($Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandnqrt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vd32 = vand(!$Qu4,$Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_4711514, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-10} = 0b0001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandnqrt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vd32 = vand(!$Qu4,$Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_4711514, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-10} = 0b0001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandnqrt_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vx32 |= vand(!$Qu4,$Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_4944558, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandnqrt_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vx32 |= vand(!$Qu4,$Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_4944558, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandnqrt_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vx32.ub |= vand(!$Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandnqrt_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vx32.ub |= vand(!$Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandnqrt_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vd32.ub = vand(!$Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandnqrt_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vd32.ub = vand(!$Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandqrt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vd32 = vand($Qu4,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_4711514, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandqrt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vd32 = vand($Qu4,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_4711514, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandqrt_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vx32 |= vand($Qu4,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_4944558, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandqrt_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vx32 |= vand($Qu4,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_4944558, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandqrt_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vx32.ub |= vand($Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandqrt_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vx32.ub |= vand($Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandqrt_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vd32.ub = vand($Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandqrt_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vd32.ub = vand($Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandvnqv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vu32),
+"$Vd32 = vand(!$Qv4,$Vu32)",
+CVI_VA, TypeCVI_VA>, Enc_1220199, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandvnqv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vu32),
+"$Vd32 = vand(!$Qv4,$Vu32)",
+CVI_VA, TypeCVI_VA>, Enc_1220199, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandvqv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vu32),
+"$Vd32 = vand($Qv4,$Vu32)",
+CVI_VA, TypeCVI_VA>, Enc_1220199, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandvqv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vu32),
+"$Vd32 = vand($Qv4,$Vu32)",
+CVI_VA, TypeCVI_VA>, Enc_1220199, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandvrt : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Qd4 = vand($Vu32,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_11498120, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandvrt_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Qd4 = vand($Vu32,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_11498120, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandvrt_acc : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Qx4 |= vand($Vu32,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_10612292, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vandvrt_acc_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Qx4 |= vand($Vu32,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_10612292, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vandvrt_acc_alt : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Qx4.ub |= vand($Vu32.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vandvrt_acc_alt_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Qx4.ub |= vand($Vu32.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vandvrt_alt : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Qd4.ub = vand($Vu32.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandvrt_alt_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Qd4.ub = vand($Vu32.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vasl($Vu32.h,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vasl($Vu32.h,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vaslh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vaslh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslhv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vasl($Vu32.h,$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslhv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vasl($Vu32.h,$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslhv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaslh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslhv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaslh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vasl($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vasl($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslw_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vasl($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaslw_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vasl($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaslw_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vaslw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaslw_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vaslw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaslw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vaslw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vaslw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslwv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vasl($Vu32.w,$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslwv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vasl($Vu32.w,$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslwv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaslw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslwv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaslw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vasr($Vu32.h,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vasr($Vu32.h,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vasrh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vasrh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrhbrndsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrhbrndsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrhbrndsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrhbsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrhbsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrhubrndsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrhubrndsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrhubrndsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrhubsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrhubsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrhubsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrhv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vasr($Vu32.h,$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrhv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vasr($Vu32.h,$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrhv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vasrh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrhv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vasrh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasruwuhrndsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasruwuhrndsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vasr($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vasr($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrw_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vasr($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vasrw_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vasr($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vasrw_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vasrw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vasrw_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vasrw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vasrw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vasrw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vasrw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8)",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8)",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrwhrndsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwhrndsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwhrndsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrwhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrwuhrndsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwuhrndsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwuhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwuhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwuhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrwuh($Vu32,$Vv32,$Rt8):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrwv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vasr($Vu32.w,$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vasr($Vu32.w,$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vasrw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vasrw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vassign : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = $Vu32",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vassign_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = $Vu32",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vassignp : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32),
+"$Vdd32 = $Vuu32",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vassignp_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32),
+"$Vdd32 = $Vuu32",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vavg($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vavg($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavgh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavgh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavghrnd : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vavg($Vu32.h,$Vv32.h):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavghrnd_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vavg($Vu32.h,$Vv32.h):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavghrnd_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavgh($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavghrnd_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavgh($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vavg($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vavg($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavgub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavgub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgubrnd : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vavg($Vu32.ub,$Vv32.ub):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgubrnd_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vavg($Vu32.ub,$Vv32.ub):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgubrnd_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavgub($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgubrnd_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavgub($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavguh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vavg($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavguh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vavg($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavguh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavguh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavguh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavguh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavguhrnd : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vavg($Vu32.uh,$Vv32.uh):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavguhrnd_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vavg($Vu32.uh,$Vv32.uh):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavguhrnd_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavguh($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavguhrnd_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavguh($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vavg($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vavg($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavgw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavgw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgwrnd : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vavg($Vu32.w,$Vv32.w):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgwrnd_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vavg($Vu32.w,$Vv32.w):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgwrnd_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavgw($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgwrnd_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavgw($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vccombine : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins PredRegs:$Ps4, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"if ($Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_16145290, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011010011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vccombine_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins PredRegs:$Ps4, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"if ($Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_16145290, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011010011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vcl0h : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.uh = vcl0($Vu32.uh)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcl0h_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.uh = vcl0($Vu32.uh)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vcl0h_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vcl0h($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcl0h_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vcl0h($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vcl0w : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.uw = vcl0($Vu32.uw)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcl0w_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.uw = vcl0($Vu32.uw)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vcl0w_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vcl0w($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcl0w_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vcl0w($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vcmov : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Ps4, VectorRegs:$Vu32),
+"if ($Ps4) $Vd32 = $Vu32",
+CVI_VA, TypeCVI_VA>, Enc_12023037, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001101000000000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcmov_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Ps4, VectorRegs128B:$Vu32),
+"if ($Ps4) $Vd32 = $Vu32",
+CVI_VA, TypeCVI_VA>, Enc_12023037, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001101000000000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vcombine : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vcombine($Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isRegSequence = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcombine_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vcombine($Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isRegSequence = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vd0 : HInst<
+(outs VectorRegs:$Vd32),
+(ins),
+"$Vd32 = #0",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vd0_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins),
+"$Vd32 = #0",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdeal : HInst<
+(outs VectorRegs:$Vy32, VectorRegs:$Vx32),
+(ins VectorRegs:$Vy32in, VectorRegs:$Vx32in, IntRegs:$Rt32),
+"vdeal($Vy32,$Vx32,$Rt32)",
+CVI_VP_VS_LONG_EARLY, TypeCVI_VP_VS>, Enc_11422009, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
+}
+def V6_vdeal_128B : HInst<
+(outs VectorRegs128B:$Vy32, VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vy32in, VectorRegs128B:$Vx32in, IntRegs:$Rt32),
+"vdeal($Vy32,$Vx32,$Rt32)",
+CVI_VP_VS_LONG_EARLY, TypeCVI_VP_VS>, Enc_11422009, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
+}
+def V6_vdealb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.b = vdeal($Vu32.b)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealb4w : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vdeale($Vu32.b,$Vv32.b)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealb4w_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vdeale($Vu32.b,$Vv32.b)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdealb4w_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vdealb4w($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealb4w_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vdealb4w($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdealb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.b = vdeal($Vu32.b)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdealb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vdealb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vdealb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdealh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.h = vdeal($Vu32.h)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.h = vdeal($Vu32.h)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdealh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vdealh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vdealh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdealvdd : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32 = vdeal($Vu32,$Vv32,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealvdd_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32 = vdeal($Vu32,$Vv32,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdelta : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vdelta($Vu32,$Vv32)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdelta_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vdelta($Vu32,$Vv32)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpybus : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vdmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpybus_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vdmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpybus_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.h += vdmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpybus_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.h += vdmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpybus_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpybus_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpybus_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpybus_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpybus_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vdmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpybus_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vdmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpybus_dv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vdmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpybus_dv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vdmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpybus_dv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vdmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpybus_dv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vdmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpybus_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vdmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpybus_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vdmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vu32.h,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vu32.h,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhb_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vu32.h,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhb_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vu32.h,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhb_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyhb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhb_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyhb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyhb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyhb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhb_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vdmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhb_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vdmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhb_dv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vdmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpyhb_dv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vdmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpyhb_dv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vdmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpyhb_dv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vdmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpyhb_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vdmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhb_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vdmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhisat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vuu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_36641, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhisat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vuu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_36641, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhisat_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vuu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5890213, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhisat_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vuu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5890213, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhisat_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyh($Vuu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhisat_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyh($Vuu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhisat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyh($Vuu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhisat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyh($Vuu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhsat_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsat_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsat_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyh($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsat_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyh($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyh($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyh($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhsuisat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_36641, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhsuisat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_36641, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhsuisat_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5890213, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsuisat_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5890213, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsuisat_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyhsu($Vuu32,$Rt32,#1):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsuisat_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyhsu($Vuu32,$Rt32,#1):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsuisat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyhsu($Vuu32,$Rt32,#1):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhsuisat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyhsu($Vuu32,$Rt32,#1):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhsusat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhsusat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhsusat_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsusat_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsusat_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyhsu($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsusat_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyhsu($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsusat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyhsu($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhsusat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyhsu($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhvsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhvsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhvsat_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vdmpy($Vu32.h,$Vv32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhvsat_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vdmpy($Vu32.h,$Vv32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhvsat_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vdmpyh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhvsat_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vdmpyh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhvsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vdmpyh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhvsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vdmpyh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdsaduh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.uw = vdsad($Vuu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdsaduh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.uw = vdsad($Vuu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdsaduh_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.uw += vdsad($Vuu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdsaduh_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.uw += vdsad($Vuu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdsaduh_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vdsaduh($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdsaduh_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vdsaduh($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdsaduh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vdsaduh($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdsaduh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vdsaduh($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_veqb : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_veqb_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_veqb_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqb_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqb_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqb_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqb_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqb_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqh : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_veqh_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_veqh_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqh_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqh_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqh_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqh_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqh_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqw : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_veqw_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_veqw_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqw_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqw_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqw_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqw_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqw_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtb : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgtb_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vgtb_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtb_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtb_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtb_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtb_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtb_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgth : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgth_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vgth_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgth_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgth_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgth_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgth_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgth_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtub : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgtub_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vgtub_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtub_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtub_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtub_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtub_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b101000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtub_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b101000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuh : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgtuh_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vgtuh_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuh_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuh_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b011001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuh_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b011001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuh_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b101001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuh_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b101001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuw : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgtuw_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vgtuw_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuw_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuw_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b011010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuw_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b011010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuw_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b101010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuw_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b101010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtw : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgtw_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vgtw_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtw_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtw_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtw_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtw_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtw_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vhist : HInst<
+(outs),
+(ins),
+"vhist",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV60T,UseHVX]> {
+let Inst{13-0} = 0b10000010000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vhist_128B : HInst<
+(outs),
+(ins),
+"vhist",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV60T,UseHVX]> {
+let Inst{13-0} = 0b10000010000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vhistq : HInst<
+(outs),
+(ins VecPredRegs:$Qv4),
+"vhist($Qv4)",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV60T,UseHVX]> {
+let Inst{13-0} = 0b10000010000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vhistq_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4),
+"vhist($Qv4)",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV60T,UseHVX]> {
+let Inst{13-0} = 0b10000010000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vinsertwr : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, IntRegs:$Rt32),
+"$Vx32.w = vinsert($Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_313333, Requires<[HasV60T,UseHVX]> {
+let Inst{13-5} = 0b100000001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vinsertwr_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, IntRegs:$Rt32),
+"$Vx32.w = vinsert($Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_313333, Requires<[HasV60T,UseHVX]> {
+let Inst{13-5} = 0b100000001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vlalignb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vlalign($Vu32,$Vv32,$Rt8)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlalignb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vlalign($Vu32,$Vv32,$Rt8)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlalignbi : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii),
+"$Vd32 = vlalign($Vu32,$Vv32,#$Ii)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV60T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlalignbi_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii),
+"$Vd32 = vlalign($Vu32,$Vv32,#$Ii)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV60T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.ub = vlsr($Vu32.ub,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.ub = vlsr($Vu32.ub,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.uh = vlsr($Vu32.uh,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.uh = vlsr($Vu32.uh,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vlsrh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vlsrh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrhv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vlsr($Vu32.h,$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrhv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vlsr($Vu32.h,$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrhv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vlsrh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrhv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vlsrh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.uw = vlsr($Vu32.uw,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.uw = vlsr($Vu32.uw,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vlsrw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vlsrw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrwv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vlsr($Vu32.w,$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrwv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vlsr($Vu32.w,$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrwv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vlsrw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrwv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vlsrw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlutvvb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlutvvb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlutvvb_nm : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8):nomatch",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlutvvb_nm_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8):nomatch",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlutvvb_oracc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vx32.b |= vlut32($Vu32.b,$Vv32.b,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_8877260, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vlutvvb_oracc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vx32.b |= vlut32($Vu32.b,$Vv32.b,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_8877260, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vlutvvb_oracci : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii),
+"$Vx32.b |= vlut32($Vu32.b,$Vv32.b,#$Ii)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_8280533, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vlutvvb_oracci_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii),
+"$Vx32.b |= vlut32($Vu32.b,$Vv32.b,#$Ii)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_8280533, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vlutvvbi : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii),
+"$Vd32.b = vlut32($Vu32.b,$Vv32.b,#$Ii)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlutvvbi_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii),
+"$Vd32.b = vlut32($Vu32.b,$Vv32.b,#$Ii)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlutvwh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlutvwh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlutvwh_nm : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8):nomatch",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlutvwh_nm_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8):nomatch",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlutvwh_oracc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_16213761, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vlutvwh_oracc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_16213761, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vlutvwh_oracci : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii),
+"$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,#$Ii)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_3457570, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vlutvwh_oracci_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii),
+"$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,#$Ii)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_3457570, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vlutvwhi : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii),
+"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,#$Ii)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_13261538, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlutvwhi_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii),
+"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,#$Ii)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_13261538, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vmax($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vmax($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmaxb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmaxb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vmax($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vmax($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmaxh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmaxh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vmax($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vmax($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmaxub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmaxub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vmax($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vmax($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmaxuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmaxuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmax($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmax($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmaxw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmaxw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vmin($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vmin($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vminb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vminb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vmin($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vmin($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vminh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vminh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vmin($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vmin($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vminub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vminub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vmin($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vmin($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vminuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vminuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmin($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmin($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vminw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vminw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpabus : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vmpa($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpabus_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vmpa($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpabus_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vmpa($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpabus_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vmpa($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpabus_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vmpabus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpabus_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vmpabus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpabus_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vmpabus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpabus_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vmpabus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpabusv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.b)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpabusv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.b)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpabusv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vmpabus($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpabusv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vmpabus($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpabuuv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.ub)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpabuuv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.ub)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpabuuv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vmpabuu($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpabuuv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vmpabuu($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpahb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vmpa($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpahb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vmpa($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpahb_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vmpa($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpahb_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vmpa($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpahb_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vmpahb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpahb_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vmpahb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpahb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vmpahb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpahb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vmpahb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpauhb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vmpa($Vuu32.uh,$Rt32.b)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpauhb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vmpa($Vuu32.uh,$Rt32.b)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpauhb_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vmpa($Vuu32.uh,$Rt32.b)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpauhb_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vmpa($Vuu32.uh,$Rt32.b)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpauhb_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vmpauhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpauhb_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vmpauhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpauhb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vmpauhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpauhb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vmpauhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpybus : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32.h = vmpy($Vu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpybus_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32.h = vmpy($Vu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpybus_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32.h += vmpy($Vu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybus_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32.h += vmpy($Vu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybus_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybus_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybus_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpybus_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpybusv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.h = vmpy($Vu32.ub,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpybusv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.h = vmpy($Vu32.ub,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpybusv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.h += vmpy($Vu32.ub,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybusv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.h += vmpy($Vu32.ub,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybusv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybusv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybusv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpybusv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpybv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.h = vmpy($Vu32.b,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpybv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.h = vmpy($Vu32.b,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpybv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.h += vmpy($Vu32.b,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.h += vmpy($Vu32.b,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpybv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyewuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmpye($Vu32.w,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyewuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmpye($Vu32.w,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyewuh_64 : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpye($Vu32.w,$Vv32.uh)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyewuh_64_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpye($Vu32.w,$Vv32.uh)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyewuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyewuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyewuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyewuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32.w = vmpy($Vu32.h,$Rt32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32.w = vmpy($Vu32.h,$Rt32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpyh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpyh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhsat_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32.w += vmpy($Vu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhsat_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32.w += vmpy($Vu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhsat_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpyh($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhsat_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpyh($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhsrs : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhsrs_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhsrs_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyh($Vu32,$Rt32):<<1:rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhsrs_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyh($Vu32,$Rt32):<<1:rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhss : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhss_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhss_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyh($Vu32,$Rt32):<<1:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhss_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyh($Vu32,$Rt32):<<1:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhus : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.w = vmpy($Vu32.h,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhus_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.w = vmpy($Vu32.h,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhus_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.w += vmpy($Vu32.h,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhus_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.w += vmpy($Vu32.h,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhus_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpyhus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhus_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpyhus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhus_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpyhus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhus_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpyhus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.w = vmpy($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.w = vmpy($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.w += vmpy($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.w += vmpy($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpyh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpyh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpyh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpyh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhvsrs : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhvsrs_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhvsrs_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyh($Vu32,$Vv32):<<1:rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhvsrs_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyh($Vu32,$Vv32):<<1:rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyieoh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmpyieo($Vu32.h,$Vv32.h)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyieoh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmpyieo($Vu32.h,$Vv32.h)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiewh_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vmpyie($Vu32.w,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewh_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vmpyie($Vu32.w,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewh_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vmpyiewh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewh_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vmpyiewh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmpyie($Vu32.w,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiewuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmpyie($Vu32.w,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiewuh_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vmpyie($Vu32.w,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewuh_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vmpyie($Vu32.w,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewuh_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vmpyiewuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewuh_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vmpyiewuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyiewuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiewuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyiewuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyih : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vmpyi($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyih_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vmpyi($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyih_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.h += vmpyi($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyih_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.h += vmpyi($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyih_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vmpyih($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyih_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vmpyih($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyih_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyih($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyih_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyih($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyihb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vmpyi($Vu32.h,$Rt32.b)",
+CVI_VX_LONG, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyihb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vmpyi($Vu32.h,$Rt32.b)",
+CVI_VX_LONG, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyihb_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.h += vmpyi($Vu32.h,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyihb_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.h += vmpyi($Vu32.h,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyihb_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyihb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyihb_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyihb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyihb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyihb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyihb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyihb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiowh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmpyio($Vu32.w,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiowh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmpyio($Vu32.w,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiowh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyiowh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiowh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyiowh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiwb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vmpyi($Vu32.w,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiwb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vmpyi($Vu32.w,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiwb_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vmpyi($Vu32.w,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwb_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vmpyi($Vu32.w,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwb_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyiwb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwb_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyiwb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyiwb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiwb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyiwb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiwh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vmpyi($Vu32.w,$Rt32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiwh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vmpyi($Vu32.w,$Rt32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiwh_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vmpyi($Vu32.w,$Rt32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwh_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vmpyi($Vu32.w,$Rt32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwh_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyiwh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwh_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyiwh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyiwh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiwh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyiwh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiwub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vmpyi($Vu32.w,$Rt32.ub)",
+CVI_VX_LONG, TypeCVI_VX>, Enc_16214129, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiwub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vmpyi($Vu32.w,$Rt32.ub)",
+CVI_VX_LONG, TypeCVI_VX>, Enc_16214129, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiwub_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vmpyi($Vu32.w,$Rt32.ub)",
+CVI_VX_LONG, TypeCVI_VX>, Enc_10058269, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwub_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vmpyi($Vu32.w,$Rt32.ub)",
+CVI_VX_LONG, TypeCVI_VX>, Enc_10058269, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwub_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyiwub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwub_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyiwub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyiwub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiwub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyiwub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyowh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyowh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyowh_64_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpyo($Vu32.w,$Vv32.h)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyowh_64_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpyo($Vu32.w,$Vv32.h)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyowh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyowh($Vu32,$Vv32):<<1:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyowh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyowh($Vu32,$Vv32):<<1:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyowh_rnd : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyowh_rnd_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyowh_rnd_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyowh($Vu32,$Vv32):<<1:rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyowh_rnd_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyowh($Vu32,$Vv32):<<1:rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyowh_rnd_sacc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat:shift",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_rnd_sacc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat:shift",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_rnd_sacc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vmpyowh($Vu32,$Vv32):<<1:rnd:sat:shift",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_rnd_sacc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vmpyowh($Vu32,$Vv32):<<1:rnd:sat:shift",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_sacc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:sat:shift",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_sacc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:sat:shift",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_sacc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vmpyowh($Vu32,$Vv32):<<1:sat:shift",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_sacc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vmpyowh($Vu32,$Vv32):<<1:sat:shift",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyub : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32.uh = vmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyub_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32.uh = vmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyub_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32.uh += vmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyub_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32.uh += vmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyub_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyub_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyub_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyub_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyubv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.uh = vmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyubv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.uh = vmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyubv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.uh += vmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyubv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.uh += vmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyubv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyubv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyubv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyubv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyuh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32.uw = vmpy($Vu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyuh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32.uw = vmpy($Vu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyuh_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32.uw += vmpy($Vu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuh_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32.uw += vmpy($Vu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuh_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpyuh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuh_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpyuh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpyuh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyuh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpyuh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyuhv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.uw = vmpy($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyuhv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.uw = vmpy($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyuhv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.uw += vmpy($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuhv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.uw += vmpy($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuhv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpyuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuhv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpyuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuhv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpyuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyuhv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpyuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmux : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qt4, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmux($Qt4,$Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_1572239, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmux_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qt4, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmux($Qt4,$Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_1572239, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnavgh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vnavg($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnavgh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vnavg($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnavgh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vnavgh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnavgh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vnavgh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnavgub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vnavg($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnavgub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vnavg($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnavgub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vnavgub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnavgub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vnavgub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnavgw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vnavg($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnavgw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vnavg($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnavgw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vnavgw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnavgw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vnavgw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnccombine : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins PredRegs:$Ps4, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"if (!$Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_16145290, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011010010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnccombine_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins PredRegs:$Ps4, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"if (!$Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_16145290, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011010010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vncmov : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Ps4, VectorRegs:$Vu32),
+"if (!$Ps4) $Vd32 = $Vu32",
+CVI_VA, TypeCVI_VA>, Enc_12023037, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001101000100000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vncmov_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Ps4, VectorRegs128B:$Vu32),
+"if (!$Ps4) $Vd32 = $Vu32",
+CVI_VA, TypeCVI_VA>, Enc_12023037, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001101000100000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnormamth : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.h = vnormamt($Vu32.h)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnormamth_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.h = vnormamt($Vu32.h)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnormamth_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vnormamth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnormamth_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vnormamth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnormamtw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.w = vnormamt($Vu32.w)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnormamtw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.w = vnormamt($Vu32.w)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnormamtw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vnormamtw($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnormamtw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vnormamtw($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnot : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vnot($Vu32)",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnot_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vnot($Vu32)",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vor : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vor($Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vor_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vor($Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackeb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vpacke($Vu32.h,$Vv32.h)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackeb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vpacke($Vu32.h,$Vv32.h)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackeb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackeb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackeb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackeb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackeh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vpacke($Vu32.w,$Vv32.w)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackeh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vpacke($Vu32.w,$Vv32.w)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackeh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackeh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackeh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackeh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackhb_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vpack($Vu32.h,$Vv32.h):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackhb_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vpack($Vu32.h,$Vv32.h):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackhb_sat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackhb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackhb_sat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackhb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackhub_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vpack($Vu32.h,$Vv32.h):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackhub_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vpack($Vu32.h,$Vv32.h):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackhub_sat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackhub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackhub_sat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackhub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackob : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vpacko($Vu32.h,$Vv32.h)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackob_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vpacko($Vu32.h,$Vv32.h)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackob_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackob($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackob_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackob($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackoh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vpacko($Vu32.w,$Vv32.w)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackoh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vpacko($Vu32.w,$Vv32.w)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackoh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackoh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackoh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackoh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackwh_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vpack($Vu32.w,$Vv32.w):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackwh_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vpack($Vu32.w,$Vv32.w):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackwh_sat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackwh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackwh_sat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackwh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackwuh_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vpack($Vu32.w,$Vv32.w):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackwuh_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vpack($Vu32.w,$Vv32.w):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackwuh_sat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackwuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackwuh_sat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackwuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpopcounth : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.h = vpopcount($Vu32.h)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpopcounth_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.h = vpopcount($Vu32.h)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpopcounth_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vpopcounth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpopcounth_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vpopcounth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrdelta : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vrdelta($Vu32,$Vv32)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrdelta_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vrdelta($Vu32,$Vv32)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybus : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vrmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybus_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vrmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybus_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vrmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybus_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vrmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybus_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vrmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybus_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vrmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybus_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vrmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybus_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vrmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybusi : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32.w = vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybusi_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32.w = vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybusi_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32.w += vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpybusi_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32.w += vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpybusi_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32 += vrmpybus($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpybusi_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32 += vrmpybus($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpybusi_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32 = vrmpybus($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybusi_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32 = vrmpybus($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybusv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vrmpy($Vu32.ub,$Vv32.b)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybusv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vrmpy($Vu32.ub,$Vv32.b)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybusv_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybusv_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybusv_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vrmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybusv_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vrmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybusv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vrmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybusv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vrmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vrmpy($Vu32.b,$Vv32.b)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vrmpy($Vu32.b,$Vv32.b)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybv_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vrmpy($Vu32.b,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybv_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vrmpy($Vu32.b,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybv_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vrmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybv_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vrmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vrmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vrmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpyub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.uw = vrmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.uw = vrmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpyub_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.uw += vrmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyub_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.uw += vrmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyub_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vrmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyub_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vrmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vrmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vrmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpyubi : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32.uw = vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyubi_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32.uw = vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpyubi_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32.uw += vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpyubi_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32.uw += vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpyubi_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32 += vrmpyub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpyubi_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32 += vrmpyub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpyubi_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32 = vrmpyub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyubi_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32 = vrmpyub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpyubv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uw = vrmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyubv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uw = vrmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpyubv_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyubv_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyubv_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vrmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyubv_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vrmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyubv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vrmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyubv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vrmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vror : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vror($Vu32,$Rt32)",
+CVI_VP, TypeCVI_VP>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vror_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vror($Vu32,$Rt32)",
+CVI_VP, TypeCVI_VP>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundhb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vround($Vu32.h,$Vv32.h):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundhb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vround($Vu32.h,$Vv32.h):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundhb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vroundhb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundhb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vroundhb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundhub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vround($Vu32.h,$Vv32.h):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundhub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vround($Vu32.h,$Vv32.h):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundhub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vroundhub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundhub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vroundhub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrounduhub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vround($Vu32.uh,$Vv32.uh):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrounduhub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vround($Vu32.uh,$Vv32.uh):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrounduhub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vrounduhub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrounduhub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vrounduhub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrounduwuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vround($Vu32.uw,$Vv32.uw):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrounduwuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vround($Vu32.uw,$Vv32.uw):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrounduwuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vrounduwuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrounduwuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vrounduwuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundwh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vround($Vu32.w,$Vv32.w):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundwh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vround($Vu32.w,$Vv32.w):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundwh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vroundwh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundwh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vroundwh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundwuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vround($Vu32.w,$Vv32.w):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundwuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vround($Vu32.w,$Vv32.w):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundwuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vroundwuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundwuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vroundwuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrsadubi : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32.uw = vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrsadubi_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32.uw = vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrsadubi_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32.uw += vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrsadubi_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32.uw += vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrsadubi_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32 += vrsadub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrsadubi_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32 += vrsadub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrsadubi_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32 = vrsadub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrsadubi_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32 = vrsadub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsathub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vsat($Vu32.h,$Vv32.h)",
+CVI_VINLANESAT, TypeCVI_VINLANESAT>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsathub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vsat($Vu32.h,$Vv32.h)",
+CVI_VINLANESAT, TypeCVI_VINLANESAT>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsathub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsathub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsathub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsathub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsatuwuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vsat($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsatuwuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vsat($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsatuwuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsatuwuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsatuwuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsatuwuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsatwh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vsat($Vu32.w,$Vv32.w)",
+CVI_VINLANESAT, TypeCVI_VINLANESAT>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsatwh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vsat($Vu32.w,$Vv32.w)",
+CVI_VINLANESAT, TypeCVI_VINLANESAT>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsatwh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsatwh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsatwh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsatwh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.h = vsxt($Vu32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.h = vsxt($Vu32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vsxtb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vsxtb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.w = vsxt($Vu32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.w = vsxt($Vu32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vsxth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vsxth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufeh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vshuffe($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufeh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vshuffe($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufeh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vshuffeh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufeh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vshuffeh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuff : HInst<
+(outs VectorRegs:$Vy32, VectorRegs:$Vx32),
+(ins VectorRegs:$Vy32in, VectorRegs:$Vx32in, IntRegs:$Rt32),
+"vshuff($Vy32,$Vx32,$Rt32)",
+CVI_VP_VS_LONG_EARLY, TypeCVI_VP_VS>, Enc_11422009, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
+}
+def V6_vshuff_128B : HInst<
+(outs VectorRegs128B:$Vy32, VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vy32in, VectorRegs128B:$Vx32in, IntRegs:$Rt32),
+"vshuff($Vy32,$Vx32,$Rt32)",
+CVI_VP_VS_LONG_EARLY, TypeCVI_VP_VS>, Enc_11422009, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
+}
+def V6_vshuffb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.b = vshuff($Vu32.b)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.b = vshuff($Vu32.b)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vshuffb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vshuffb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffeb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vshuffe($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffeb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vshuffe($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffeb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vshuffeb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffeb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vshuffeb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.h = vshuff($Vu32.h)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.h = vshuff($Vu32.h)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vshuffh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vshuffh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffob : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vshuffo($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffob_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vshuffo($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffob_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vshuffob($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffob_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vshuffob($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffvdd : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32 = vshuff($Vu32,$Vv32,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffvdd_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32 = vshuff($Vu32,$Vv32,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufoeb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.b = vshuffoe($Vu32.b,$Vv32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufoeb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.b = vshuffoe($Vu32.b,$Vv32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufoeb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vshuffoeb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufoeb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vshuffoeb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufoeh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.h = vshuffoe($Vu32.h,$Vv32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufoeh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.h = vshuffoe($Vu32.h,$Vv32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufoeh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vshuffoeh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufoeh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vshuffoeh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufoh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vshuffo($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufoh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vshuffo($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufoh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vshuffoh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufoh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vshuffoh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vsub($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vsub($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubb_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.b = vsub($Vuu32.b,$Vvv32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubb_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.b = vsub($Vuu32.b,$Vvv32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubb_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubb($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubb_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubb($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubbnq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4) $Vx32.b -= $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbnq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4) $Vx32.b -= $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbnq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4.b) $Vx32.b -= $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbnq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4.b) $Vx32.b -= $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4) $Vx32.b -= $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4) $Vx32.b -= $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4.b) $Vx32.b -= $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4.b) $Vx32.b -= $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vsub($Vu32.b,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubbsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vsub($Vu32.b,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubbsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubbsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubbsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.b = vsub($Vuu32.b,$Vvv32.b):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubbsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.b = vsub($Vuu32.b,$Vvv32.b):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubbsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubb($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubbsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubb($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubcarry : HInst<
+(outs VectorRegs:$Vd32, VecPredRegs:$Qx4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, VecPredRegs:$Qx4in),
+"$Vd32.w = vsub($Vu32.w,$Vv32.w,$Qx4):carry",
+CVI_VA, TypeCVI_VA>, Enc_13691337, Requires<[HasV62T,UseHVX]> {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vsubcarry_128B : HInst<
+(outs VectorRegs128B:$Vd32, VecPredRegs128B:$Qx4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, VecPredRegs128B:$Qx4in),
+"$Vd32.w = vsub($Vu32.w,$Vv32.w,$Qx4):carry",
+CVI_VA, TypeCVI_VA>, Enc_13691337, Requires<[HasV62T,UseHVX]> {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vsubh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vsub($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vsub($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubh_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.h = vsub($Vuu32.h,$Vvv32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubh_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.h = vsub($Vuu32.h,$Vvv32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubh_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubh($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubh_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubh($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubhnq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4) $Vx32.h -= $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhnq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4) $Vx32.h -= $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhnq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4.h) $Vx32.h -= $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhnq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4.h) $Vx32.h -= $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4) $Vx32.h -= $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4) $Vx32.h -= $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4.h) $Vx32.h -= $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4.h) $Vx32.h -= $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vsub($Vu32.h,$Vv32.h):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vsub($Vu32.h,$Vv32.h):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubhsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubhsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.h = vsub($Vuu32.h,$Vvv32.h):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubhsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.h = vsub($Vuu32.h,$Vvv32.h):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubhsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubhsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubhw : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.w = vsub($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubhw_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.w = vsub($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubhw_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vsubh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubhw_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vsubh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsububh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.h = vsub($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsububh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.h = vsub($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsububh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vsubub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsububh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vsubub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsububsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vsub($Vu32.ub,$Vv32.ub):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsububsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vsub($Vu32.ub,$Vv32.ub):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsububsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsububsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsububsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.ub = vsub($Vuu32.ub,$Vvv32.ub):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsububsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.ub = vsub($Vuu32.ub,$Vvv32.ub):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsububsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubub($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsububsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubub($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubububb_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vsub($Vu32.ub,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubububb_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vsub($Vu32.ub,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vsub($Vu32.uh,$Vv32.uh):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vsub($Vu32.uh,$Vv32.uh):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuhsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuhsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.uh = vsub($Vuu32.uh,$Vvv32.uh):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuhsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.uh = vsub($Vuu32.uh,$Vvv32.uh):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuhsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubuh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuhsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubuh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuhw : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.w = vsub($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuhw_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.w = vsub($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuhw_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vsubuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuhw_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vsubuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuwsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uw = vsub($Vu32.uw,$Vv32.uw):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuwsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uw = vsub($Vu32.uw,$Vv32.uw):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuwsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubuw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuwsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubuw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuwsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.uw = vsub($Vuu32.uw,$Vvv32.uw):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuwsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.uw = vsub($Vuu32.uw,$Vvv32.uw):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuwsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubuw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuwsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubuw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vsub($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vsub($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubw_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.w = vsub($Vuu32.w,$Vvv32.w)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubw_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.w = vsub($Vuu32.w,$Vvv32.w)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubw_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubw($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubw_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubw($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubwnq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4) $Vx32.w -= $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwnq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4) $Vx32.w -= $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwnq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4.w) $Vx32.w -= $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwnq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4.w) $Vx32.w -= $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4) $Vx32.w -= $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4) $Vx32.w -= $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4.w) $Vx32.w -= $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4.w) $Vx32.w -= $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vsub($Vu32.w,$Vv32.w):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubwsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vsub($Vu32.w,$Vv32.w):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubwsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubwsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubwsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.w = vsub($Vuu32.w,$Vvv32.w):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubwsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.w = vsub($Vuu32.w,$Vvv32.w):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubwsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubwsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vswap : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecPredRegs:$Qt4, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vswap($Qt4,$Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_11424254, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vswap_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecPredRegs128B:$Qt4, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vswap($Qt4,$Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_11424254, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtmpyb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vtmpy($Vuu32.b,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vtmpyb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vtmpy($Vuu32.b,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtmpyb_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vtmpy($Vuu32.b,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyb_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vtmpy($Vuu32.b,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyb_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vtmpyb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyb_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vtmpyb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vtmpyb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vtmpyb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vtmpyb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtmpybus : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vtmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vtmpybus_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vtmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtmpybus_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vtmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpybus_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vtmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpybus_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vtmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpybus_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vtmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpybus_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vtmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vtmpybus_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vtmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtmpyhb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vtmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vtmpyhb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vtmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtmpyhb_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vtmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyhb_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vtmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyhb_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vtmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyhb_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vtmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyhb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vtmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vtmpyhb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vtmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtran2x2_map : HInst<
+(outs VectorRegs:$Vy32, VectorRegs:$Vx32),
+(ins VectorRegs:$Vy32in, VectorRegs:$Vx32in, IntRegs:$Rt32),
+"vtrans2x2($Vy32,$Vx32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
+}
+def V6_vtran2x2_map_128B : HInst<
+(outs VectorRegs128B:$Vy32, VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vy32in, VectorRegs128B:$Vx32in, IntRegs:$Rt32),
+"vtrans2x2($Vy32,$Vx32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
+}
+def V6_vunpackb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.h = vunpack($Vu32.b)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.h = vunpack($Vu32.b)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vunpackb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vunpackb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.w = vunpack($Vu32.h)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.w = vunpack($Vu32.h)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vunpackh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vunpackh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackob : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32),
+"$Vxx32.h |= vunpacko($Vu32.b)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_12669374, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackob_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32),
+"$Vxx32.h |= vunpacko($Vu32.b)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_12669374, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackob_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32),
+"$Vxx32 |= vunpackob($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackob_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32),
+"$Vxx32 |= vunpackob($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackoh : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32),
+"$Vxx32.w |= vunpacko($Vu32.h)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_12669374, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackoh_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32),
+"$Vxx32.w |= vunpacko($Vu32.h)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_12669374, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackoh_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32),
+"$Vxx32 |= vunpackoh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackoh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32),
+"$Vxx32 |= vunpackoh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackub : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.uh = vunpack($Vu32.ub)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackub_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.uh = vunpack($Vu32.ub)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackub_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vunpackub($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackub_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vunpackub($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackuh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.uw = vunpack($Vu32.uh)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackuh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.uw = vunpack($Vu32.uh)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackuh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vunpackuh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackuh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vunpackuh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist128 : HInst<
+(outs),
+(ins),
+"vwhist128",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10010010000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist128_128B : HInst<
+(outs),
+(ins),
+"vwhist128",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10010010000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist128m : HInst<
+(outs),
+(ins u1_0Imm:$Ii),
+"vwhist128(#$Ii)",
+CVI_HIST, TypeCVI_HIST>, Enc_1291652, Requires<[HasV62T,UseHVX]> {
+let Inst{7-0} = 0b10000000;
+let Inst{13-9} = 0b10011;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist128m_128B : HInst<
+(outs),
+(ins u1_0Imm:$Ii),
+"vwhist128(#$Ii)",
+CVI_HIST, TypeCVI_HIST>, Enc_1291652, Requires<[HasV62T,UseHVX]> {
+let Inst{7-0} = 0b10000000;
+let Inst{13-9} = 0b10011;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist128q : HInst<
+(outs),
+(ins VecPredRegs:$Qv4),
+"vwhist128($Qv4)",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10010010000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist128q_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4),
+"vwhist128($Qv4)",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10010010000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist128qm : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, u1_0Imm:$Ii),
+"vwhist128($Qv4,#$Ii)",
+CVI_HIST, TypeCVI_HIST>, Enc_7978128, Requires<[HasV62T,UseHVX]> {
+let Inst{7-0} = 0b10000000;
+let Inst{13-9} = 0b10011;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist128qm_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, u1_0Imm:$Ii),
+"vwhist128($Qv4,#$Ii)",
+CVI_HIST, TypeCVI_HIST>, Enc_7978128, Requires<[HasV62T,UseHVX]> {
+let Inst{7-0} = 0b10000000;
+let Inst{13-9} = 0b10011;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist256 : HInst<
+(outs),
+(ins),
+"vwhist256",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001010000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist256_128B : HInst<
+(outs),
+(ins),
+"vwhist256",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001010000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist256_sat : HInst<
+(outs),
+(ins),
+"vwhist256:sat",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001110000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist256_sat_128B : HInst<
+(outs),
+(ins),
+"vwhist256:sat",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001110000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist256q : HInst<
+(outs),
+(ins VecPredRegs:$Qv4),
+"vwhist256($Qv4)",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001010000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist256q_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4),
+"vwhist256($Qv4)",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001010000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist256q_sat : HInst<
+(outs),
+(ins VecPredRegs:$Qv4),
+"vwhist256($Qv4):sat",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001110000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist256q_sat_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4),
+"vwhist256($Qv4):sat",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001110000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vxor : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vxor($Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vxor_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vxor($Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vzb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.uh = vzxt($Vu32.ub)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vzb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.uh = vzxt($Vu32.ub)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vzb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vzxtb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vzb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vzxtb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vzh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.uw = vzxt($Vu32.uh)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vzh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.uw = vzxt($Vu32.uh)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vzh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vzxth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vzh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vzxth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def Y2_barrier : HInst<
+(outs),
+(ins),
+"barrier",
+ST_tc_3stall_SLOT0, TypeST>, Enc_0 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-16} = 0b1010100000000000;
+let isSoloAX = 1;
+let hasSideEffects = 1;
+}
+def Y2_break : HInst<
+(outs),
+(ins),
+"brkpt",
+CR_tc_3x_SLOT3, TypeCR>, Enc_0 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-16} = 0b0110110000100000;
+let isSolo = 1;
+}
+def Y2_dccleana : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dccleana($Rs32)",
+ST_tc_ld_SLOT0, TypeST>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100000000;
+let isSoloAin1 = 1;
+}
+def Y2_dccleaninva : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dccleaninva($Rs32)",
+ST_tc_ld_SLOT0, TypeST>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100000010;
+let isSoloAin1 = 1;
+}
+def Y2_dcfetch : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dcfetch($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasSideEffects = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def Y2_dcfetchbo : HInst<
+(outs),
+(ins IntRegs:$Rs32, u11_3Imm:$Ii),
+"dcfetch($Rs32+#$Ii)",
+LD_tc_ld_SLOT0, TypeLD>, Enc_4983213 {
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b10010100000;
+let addrMode = BaseImmOffset;
+let hasSideEffects = 1;
+}
+def Y2_dcinva : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dcinva($Rs32)",
+ST_tc_ld_SLOT0, TypeST>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100000001;
+let isSoloAin1 = 1;
+}
+def Y2_dczeroa : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dczeroa($Rs32)",
+ST_tc_ld_SLOT0, TypeST>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100000110;
+let mayStore = 1;
+let isSoloAin1 = 1;
+}
+def Y2_icinva : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"icinva($Rs32)",
+J_tc_2early_SLOT2, TypeJ>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01010110110;
+let isSolo = 1;
+}
+def Y2_isync : HInst<
+(outs),
+(ins),
+"isync",
+J_tc_2early_SLOT2, TypeJ>, Enc_0 {
+let Inst{13-0} = 0b00000000000010;
+let Inst{31-16} = 0b0101011111000000;
+let isSolo = 1;
+}
+def Y2_syncht : HInst<
+(outs),
+(ins),
+"syncht",
+ST_tc_ld_SLOT0, TypeST>, Enc_0 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-16} = 0b1010100001000000;
+let isSolo = 1;
+}
+def Y4_l2fetch : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"l2fetch($Rs32,$Rt32)",
+ST_tc_3stall_SLOT0, TypeST>, Enc_14620934 {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10100110000;
+let isSoloAX = 1;
+let mayStore = 1;
+let hasSideEffects = 1;
+}
+def Y4_trace : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"trace($Rs32)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01100010010;
+let isSoloAX = 1;
+}
+def Y5_l2fetch : HInst<
+(outs),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"l2fetch($Rs32,$Rtt32)",
+ST_tc_3stall_SLOT0, TypeST>, Enc_8943121, Requires<[HasV5T]> {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10100110100;
+let isSoloAX = 1;
+let mayStore = 1;
+let hasSideEffects = 1;
+}
+def dep_A2_addsat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = add($Rs32,$Rt32):sat:deprecated",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def dep_A2_subsat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32,$Rs32):sat:deprecated",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def dep_S2_packhl : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = packhl($Rs32,$Rt32):deprecated",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010100000;
+}
diff --git a/lib/Target/Hexagon/HexagonDepMappings.td b/lib/Target/Hexagon/HexagonDepMappings.td
new file mode 100644
index 000000000000..77a56a9adf10
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepMappings.td
@@ -0,0 +1,654 @@
+//===--- HexagonDepMappings.td --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def A2_negAlias : InstAlias<"$Rd32=neg($Rs32)", (A2_subri IntRegs:$Rd32, 0, IntRegs:$Rs32)>;
+def A2_notAlias : InstAlias<"$Rd32=not($Rs32)", (A2_subri IntRegs:$Rd32, -1, IntRegs:$Rs32)>;
+def A2_tfrfAlias : InstAlias<"if (!$Pu4) $Rd32=$Rs32", (A2_paddif IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>;
+def A2_tfrfnewAlias : InstAlias<"if (!$Pu4.new) $Rd32=$Rs32", (A2_paddifnew IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>;
+def A2_tfrtAlias : InstAlias<"if ($Pu4) $Rd32=$Rs32", (A2_paddit IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>;
+def A2_tfrtnewAlias : InstAlias<"if ($Pu4.new) $Rd32=$Rs32", (A2_padditnew IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>;
+def A2_vaddb_mapAlias : InstAlias<"$Rdd32=vaddb($Rss32,$Rtt32)", (A2_vaddub DoubleRegs:$Rdd32, DoubleRegs:$Rss32, DoubleRegs:$Rtt32)>;
+def A2_vsubb_mapAlias : InstAlias<"$Rdd32=vsubb($Rss32,$Rtt32)", (A2_vsubub DoubleRegs:$Rdd32, DoubleRegs:$Rss32, DoubleRegs:$Rtt32)>;
+def A2_zxtbAlias : InstAlias<"$Rd32=zxtb($Rs32)", (A2_andir IntRegs:$Rd32, IntRegs:$Rs32, 255)>;
+def C2_cmpltAlias : InstAlias<"$Pd4=cmp.lt($Rs32,$Rt32)", (C2_cmpgt PredRegs:$Pd4, IntRegs:$Rt32, IntRegs:$Rs32)>;
+def C2_cmpltuAlias : InstAlias<"$Pd4=cmp.ltu($Rs32,$Rt32)", (C2_cmpgtu PredRegs:$Pd4, IntRegs:$Rt32, IntRegs:$Rs32)>;
+def C2_pxfer_mapAlias : InstAlias<"$Pd4=$Ps4", (C2_or PredRegs:$Pd4, PredRegs:$Ps4, PredRegs:$Ps4)>;
+def J2_jumpf_nopred_mapAlias : InstAlias<"if (!$Pu4) jump $Ii", (J2_jumpf PredRegs:$Pu4, b30_2Imm:$Ii)>;
+def J2_jumprf_nopred_mapAlias : InstAlias<"if (!$Pu4) jumpr $Rs32", (J2_jumprf PredRegs:$Pu4, IntRegs:$Rs32)>;
+def J2_jumprt_nopred_mapAlias : InstAlias<"if ($Pu4) jumpr $Rs32", (J2_jumprt PredRegs:$Pu4, IntRegs:$Rs32)>;
+def J2_jumpt_nopred_mapAlias : InstAlias<"if ($Pu4) jump $Ii", (J2_jumpt PredRegs:$Pu4, b30_2Imm:$Ii)>;
+def L2_loadalignb_zomapAlias : InstAlias<"$Ryy32=memb_fifo($Rs32)", (L2_loadalignb_io DoubleRegs:$Ryy32, IntRegs:$Rs32, 0)>;
+def L2_loadalignh_zomapAlias : InstAlias<"$Ryy32=memh_fifo($Rs32)", (L2_loadalignh_io DoubleRegs:$Ryy32, IntRegs:$Rs32, 0)>;
+def L2_loadbsw2_zomapAlias : InstAlias<"$Rd32=membh($Rs32)", (L2_loadbsw2_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_loadbsw4_zomapAlias : InstAlias<"$Rdd32=membh($Rs32)", (L2_loadbsw4_io DoubleRegs:$Rdd32, IntRegs:$Rs32, 0)>;
+def L2_loadbzw2_zomapAlias : InstAlias<"$Rd32=memubh($Rs32)", (L2_loadbzw2_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_loadbzw4_zomapAlias : InstAlias<"$Rdd32=memubh($Rs32)", (L2_loadbzw4_io DoubleRegs:$Rdd32, IntRegs:$Rs32, 0)>;
+def L2_loadrb_zomapAlias : InstAlias<"$Rd32=memb($Rs32)", (L2_loadrb_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_loadrd_zomapAlias : InstAlias<"$Rdd32=memd($Rs32)", (L2_loadrd_io DoubleRegs:$Rdd32, IntRegs:$Rs32, 0)>;
+def L2_loadrh_zomapAlias : InstAlias<"$Rd32=memh($Rs32)", (L2_loadrh_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_loadri_zomapAlias : InstAlias<"$Rd32=memw($Rs32)", (L2_loadri_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_loadrub_zomapAlias : InstAlias<"$Rd32=memub($Rs32)", (L2_loadrub_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_loadruh_zomapAlias : InstAlias<"$Rd32=memuh($Rs32)", (L2_loadruh_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_ploadrbf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memb($Rs32)", (L2_ploadrbf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrbfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memb($Rs32)", (L2_ploadrbfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrbt_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memb($Rs32)", (L2_ploadrbt_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrbtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memb($Rs32)", (L2_ploadrbtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrdf_zomapAlias : InstAlias<"if (!$Pt4) $Rdd32=memd($Rs32)", (L2_ploadrdf_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrdfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rdd32=memd($Rs32)", (L2_ploadrdfnew_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrdt_zomapAlias : InstAlias<"if ($Pt4) $Rdd32=memd($Rs32)", (L2_ploadrdt_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrdtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rdd32=memd($Rs32)", (L2_ploadrdtnew_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrhf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memh($Rs32)", (L2_ploadrhf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrhfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memh($Rs32)", (L2_ploadrhfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrht_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memh($Rs32)", (L2_ploadrht_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrhtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memh($Rs32)", (L2_ploadrhtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrif_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memw($Rs32)", (L2_ploadrif_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrifnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memw($Rs32)", (L2_ploadrifnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrit_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memw($Rs32)", (L2_ploadrit_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadritnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memw($Rs32)", (L2_ploadritnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrubf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memub($Rs32)", (L2_ploadrubf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrubfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memub($Rs32)", (L2_ploadrubfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrubt_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memub($Rs32)", (L2_ploadrubt_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrubtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memub($Rs32)", (L2_ploadrubtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadruhf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memuh($Rs32)", (L2_ploadruhf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadruhfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memuh($Rs32)", (L2_ploadruhfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadruht_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memuh($Rs32)", (L2_ploadruht_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadruhtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memuh($Rs32)", (L2_ploadruhtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L4_add_memopb_zomapAlias : InstAlias<"memb($Rs32)+=$Rt32", (L4_add_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_add_memoph_zomapAlias : InstAlias<"memh($Rs32)+=$Rt32", (L4_add_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_add_memopw_zomapAlias : InstAlias<"memw($Rs32)+=$Rt32", (L4_add_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_and_memopb_zomapAlias : InstAlias<"memb($Rs32)&=$Rt32", (L4_and_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_and_memoph_zomapAlias : InstAlias<"memh($Rs32)&=$Rt32", (L4_and_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_and_memopw_zomapAlias : InstAlias<"memw($Rs32)&=$Rt32", (L4_and_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_iadd_memopb_zomapAlias : InstAlias<"memb($Rs32)+=#$II", (L4_iadd_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_iadd_memoph_zomapAlias : InstAlias<"memh($Rs32)+=#$II", (L4_iadd_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_iadd_memopw_zomapAlias : InstAlias<"memw($Rs32)+=#$II", (L4_iadd_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_iand_memopb_zomapAlias : InstAlias<"memb($Rs32)=clrbit(#$II)", (L4_iand_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_iand_memoph_zomapAlias : InstAlias<"memh($Rs32)=clrbit(#$II)", (L4_iand_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_iand_memopw_zomapAlias : InstAlias<"memw($Rs32)=clrbit(#$II)", (L4_iand_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_ior_memopb_zomapAlias : InstAlias<"memb($Rs32)=setbit(#$II)", (L4_ior_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_ior_memoph_zomapAlias : InstAlias<"memh($Rs32)=setbit(#$II)", (L4_ior_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_ior_memopw_zomapAlias : InstAlias<"memw($Rs32)=setbit(#$II)", (L4_ior_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_isub_memopb_zomapAlias : InstAlias<"memb($Rs32)-=#$II", (L4_isub_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_isub_memoph_zomapAlias : InstAlias<"memh($Rs32)-=#$II", (L4_isub_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_isub_memopw_zomapAlias : InstAlias<"memw($Rs32)-=#$II", (L4_isub_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_or_memopb_zomapAlias : InstAlias<"memb($Rs32)|=$Rt32", (L4_or_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_or_memoph_zomapAlias : InstAlias<"memh($Rs32)|=$Rt32", (L4_or_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_or_memopw_zomapAlias : InstAlias<"memw($Rs32)|=$Rt32", (L4_or_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_sub_memopb_zomapAlias : InstAlias<"memb($Rs32)-=$Rt32", (L4_sub_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_sub_memoph_zomapAlias : InstAlias<"memh($Rs32)-=$Rt32", (L4_sub_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_sub_memopw_zomapAlias : InstAlias<"memw($Rs32)-=$Rt32", (L4_sub_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def M2_mpyuiAlias : InstAlias<"$Rd32=mpyui($Rs32,$Rt32)", (M2_mpyi IntRegs:$Rd32, IntRegs:$Rs32, IntRegs:$Rt32)>;
+def S2_pstorerbf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32)=$Rt32", (S2_pstorerbf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerbnewf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32)=$Nt8.new", (S2_pstorerbnewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_pstorerbnewt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32)=$Nt8.new", (S2_pstorerbnewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_pstorerbt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32)=$Rt32", (S2_pstorerbt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerdf_zomapAlias : InstAlias<"if (!$Pv4) memd($Rs32)=$Rtt32", (S2_pstorerdf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>;
+def S2_pstorerdt_zomapAlias : InstAlias<"if ($Pv4) memd($Rs32)=$Rtt32", (S2_pstorerdt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>;
+def S2_pstorerff_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32)=$Rt32.h", (S2_pstorerff_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerft_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32)=$Rt32.h", (S2_pstorerft_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerhf_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32)=$Rt32", (S2_pstorerhf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerhnewf_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32)=$Nt8.new", (S2_pstorerhnewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_pstorerhnewt_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32)=$Nt8.new", (S2_pstorerhnewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_pstorerht_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32)=$Rt32", (S2_pstorerht_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerif_zomapAlias : InstAlias<"if (!$Pv4) memw($Rs32)=$Rt32", (S2_pstorerif_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerinewf_zomapAlias : InstAlias<"if (!$Pv4) memw($Rs32)=$Nt8.new", (S2_pstorerinewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_pstorerinewt_zomapAlias : InstAlias<"if ($Pv4) memw($Rs32)=$Nt8.new", (S2_pstorerinewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_pstorerit_zomapAlias : InstAlias<"if ($Pv4) memw($Rs32)=$Rt32", (S2_pstorerit_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_storerb_zomapAlias : InstAlias<"memb($Rs32)=$Rt32", (S2_storerb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_storerbnew_zomapAlias : InstAlias<"memb($Rs32)=$Nt8.new", (S2_storerbnew_io IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_storerd_zomapAlias : InstAlias<"memd($Rs32)=$Rtt32", (S2_storerd_io IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>;
+def S2_storerf_zomapAlias : InstAlias<"memh($Rs32)=$Rt32.h", (S2_storerf_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_storerh_zomapAlias : InstAlias<"memh($Rs32)=$Rt32", (S2_storerh_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_storerhnew_zomapAlias : InstAlias<"memh($Rs32)=$Nt8.new", (S2_storerhnew_io IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_storeri_zomapAlias : InstAlias<"memw($Rs32)=$Rt32", (S2_storeri_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_storerinew_zomapAlias : InstAlias<"memw($Rs32)=$Nt8.new", (S2_storerinew_io IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_tableidxb_goodsyntaxAlias : InstAlias<"$Rx32=tableidxb($Rs32,#$Ii,#$II)", (S2_tableidxb IntRegs:$Rx32, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II)>;
+def S4_pstorerbfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memb($Rs32)=$Rt32", (S4_pstorerbfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerbnewfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memb($Rs32)=$Nt8.new", (S4_pstorerbnewfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S4_pstorerbnewtnew_zomapAlias : InstAlias<"if ($Pv4.new) memb($Rs32)=$Nt8.new", (S4_pstorerbnewtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S4_pstorerbtnew_zomapAlias : InstAlias<"if ($Pv4.new) memb($Rs32)=$Rt32", (S4_pstorerbtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerdfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memd($Rs32)=$Rtt32", (S4_pstorerdfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>;
+def S4_pstorerdtnew_zomapAlias : InstAlias<"if ($Pv4.new) memd($Rs32)=$Rtt32", (S4_pstorerdtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>;
+def S4_pstorerffnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32)=$Rt32.h", (S4_pstorerffnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerftnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32)=$Rt32.h", (S4_pstorerftnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerhfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32)=$Rt32", (S4_pstorerhfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerhnewfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32)=$Nt8.new", (S4_pstorerhnewfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S4_pstorerhnewtnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32)=$Nt8.new", (S4_pstorerhnewtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S4_pstorerhtnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32)=$Rt32", (S4_pstorerhtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerifnew_zomapAlias : InstAlias<"if (!$Pv4.new) memw($Rs32)=$Rt32", (S4_pstorerifnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerinewfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memw($Rs32)=$Nt8.new", (S4_pstorerinewfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S4_pstorerinewtnew_zomapAlias : InstAlias<"if ($Pv4.new) memw($Rs32)=$Nt8.new", (S4_pstorerinewtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S4_pstoreritnew_zomapAlias : InstAlias<"if ($Pv4.new) memw($Rs32)=$Rt32", (S4_pstoreritnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_storeirb_zomapAlias : InstAlias<"memb($Rs32)=#$II", (S4_storeirb_io IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirbf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32)=#$II", (S4_storeirbf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirbfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memb($Rs32)=#$II", (S4_storeirbfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirbt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32)=#$II", (S4_storeirbt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirbtnew_zomapAlias : InstAlias<"if ($Pv4.new) memb($Rs32)=#$II", (S4_storeirbtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirh_zomapAlias : InstAlias<"memh($Rs32)=#$II", (S4_storeirh_io IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirhf_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32)=#$II", (S4_storeirhf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirhfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32)=#$II", (S4_storeirhfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirht_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32)=#$II", (S4_storeirht_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirhtnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32)=#$II", (S4_storeirhtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeiri_zomapAlias : InstAlias<"memw($Rs32)=#$II", (S4_storeiri_io IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirif_zomapAlias : InstAlias<"if (!$Pv4) memw($Rs32)=#$II", (S4_storeirif_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirifnew_zomapAlias : InstAlias<"if (!$Pv4.new) memw($Rs32)=#$II", (S4_storeirifnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirit_zomapAlias : InstAlias<"if ($Pv4) memw($Rs32)=#$II", (S4_storeirit_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeiritnew_zomapAlias : InstAlias<"if ($Pv4.new) memw($Rs32)=#$II", (S4_storeiritnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def V6_MAP_equbAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb VecPredRegs:$Qd4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_128BAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb VecPredRegs:$Qd4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_andAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_and VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_and_128BAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_and VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_iorAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_or VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_ior_128BAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_or VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_xorAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_xor VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_xor_128BAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_xor VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equhAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh VecPredRegs:$Qd4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_128BAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh VecPredRegs:$Qd4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_andAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_and VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_and_128BAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_and VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_iorAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_or VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_ior_128BAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_or VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_xorAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_xor VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_xor_128BAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_xor VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equwAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw VecPredRegs:$Qd4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_128BAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw VecPredRegs:$Qd4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_andAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_and VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_and_128BAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_and VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_iorAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_or VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_ior_128BAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_or VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_xorAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_xor VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_xor_128BAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_xor VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_extractw_altAlias : InstAlias<"$Rd32.w=vextract($Vu32,$Rs32)", (V6_extractw IntRegs:$Rd32, VectorRegs:$Vu32, IntRegs:$Rs32)>, Requires<[UseHVX]>;
+def V6_extractw_alt_128BAlias : InstAlias<"$Rd32.w=vextract($Vu32,$Rs32)", (V6_extractw IntRegs:$Rd32, VectorRegs:$Vu32, IntRegs:$Rs32)>, Requires<[UseHVX]>;
+def V6_ld0Alias : InstAlias<"$Vd32=vmem($Rt32)", (V6_vL32b_ai VectorRegs:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_ld0_128BAlias : InstAlias<"$Vd32=vmem($Rt32)", (V6_vL32b_ai VectorRegs:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_ldnt0Alias : InstAlias<"$Vd32=vmem($Rt32):nt", (V6_vL32b_nt_ai VectorRegs:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_ldnt0_128BAlias : InstAlias<"$Vd32=vmem($Rt32):nt", (V6_vL32b_nt_ai VectorRegs:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_ldu0Alias : InstAlias<"$Vd32=vmemu($Rt32)", (V6_vL32Ub_ai VectorRegs:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_ldu0_128BAlias : InstAlias<"$Vd32=vmemu($Rt32)", (V6_vL32Ub_ai VectorRegs:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_st0Alias : InstAlias<"vmem($Rt32)=$Vs32", (V6_vS32b_ai IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_st0_128BAlias : InstAlias<"vmem($Rt32)=$Vs32", (V6_vS32b_ai IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stn0Alias : InstAlias<"vmem($Rt32)=$Os8.new", (V6_vS32b_new_ai IntRegs:$Rt32, 0, VectorRegs:$Os8)>, Requires<[UseHVX]>;
+def V6_stn0_128BAlias : InstAlias<"vmem($Rt32)=$Os8.new", (V6_vS32b_new_ai IntRegs:$Rt32, 0, VectorRegs:$Os8)>, Requires<[UseHVX]>;
+def V6_stnnt0Alias : InstAlias<"vmem($Rt32):nt=$Os8.new", (V6_vS32b_nt_new_ai IntRegs:$Rt32, 0, VectorRegs:$Os8)>, Requires<[UseHVX]>;
+def V6_stnnt0_128BAlias : InstAlias<"vmem($Rt32):nt=$Os8.new", (V6_vS32b_nt_new_ai IntRegs:$Rt32, 0, VectorRegs:$Os8)>, Requires<[UseHVX]>;
+def V6_stnp0Alias : InstAlias<"if (!$Pv4) vmem($Rt32)=$Vs32", (V6_vS32b_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnp0_128BAlias : InstAlias<"if (!$Pv4) vmem($Rt32)=$Vs32", (V6_vS32b_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnpnt0Alias : InstAlias<"if (!$Pv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnpnt0_128BAlias : InstAlias<"if (!$Pv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnq0Alias : InstAlias<"if (!$Qv4) vmem($Rt32)=$Vs32", (V6_vS32b_nqpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnq0_128BAlias : InstAlias<"if (!$Qv4) vmem($Rt32)=$Vs32", (V6_vS32b_nqpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnqnt0Alias : InstAlias<"if (!$Qv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_nqpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnqnt0_128BAlias : InstAlias<"if (!$Qv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_nqpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnt0Alias : InstAlias<"vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_ai IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnt0_128BAlias : InstAlias<"vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_ai IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stp0Alias : InstAlias<"if ($Pv4) vmem($Rt32)=$Vs32", (V6_vS32b_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stp0_128BAlias : InstAlias<"if ($Pv4) vmem($Rt32)=$Vs32", (V6_vS32b_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stpnt0Alias : InstAlias<"if ($Pv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stpnt0_128BAlias : InstAlias<"if ($Pv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stq0Alias : InstAlias<"if ($Qv4) vmem($Rt32)=$Vs32", (V6_vS32b_qpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stq0_128BAlias : InstAlias<"if ($Qv4) vmem($Rt32)=$Vs32", (V6_vS32b_qpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stqnt0Alias : InstAlias<"if ($Qv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_qpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stqnt0_128BAlias : InstAlias<"if ($Qv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_qpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stu0Alias : InstAlias<"vmemu($Rt32)=$Vs32", (V6_vS32Ub_ai IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stu0_128BAlias : InstAlias<"vmemu($Rt32)=$Vs32", (V6_vS32Ub_ai IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stunp0Alias : InstAlias<"if (!$Pv4) vmemu($Rt32)=$Vs32", (V6_vS32Ub_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stunp0_128BAlias : InstAlias<"if (!$Pv4) vmemu($Rt32)=$Vs32", (V6_vS32Ub_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stup0Alias : InstAlias<"if ($Pv4) vmemu($Rt32)=$Vs32", (V6_vS32Ub_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stup0_128BAlias : InstAlias<"if ($Pv4) vmemu($Rt32)=$Vs32", (V6_vS32Ub_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_vabsdiffh_altAlias : InstAlias<"$Vd32=vabsdiffh($Vu32,$Vv32)", (V6_vabsdiffh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffh_alt_128BAlias : InstAlias<"$Vd32=vabsdiffh($Vu32,$Vv32)", (V6_vabsdiffh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffub_altAlias : InstAlias<"$Vd32=vabsdiffub($Vu32,$Vv32)", (V6_vabsdiffub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffub_alt_128BAlias : InstAlias<"$Vd32=vabsdiffub($Vu32,$Vv32)", (V6_vabsdiffub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffuh_altAlias : InstAlias<"$Vd32=vabsdiffuh($Vu32,$Vv32)", (V6_vabsdiffuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffuh_alt_128BAlias : InstAlias<"$Vd32=vabsdiffuh($Vu32,$Vv32)", (V6_vabsdiffuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffw_altAlias : InstAlias<"$Vd32=vabsdiffw($Vu32,$Vv32)", (V6_vabsdiffw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffw_alt_128BAlias : InstAlias<"$Vd32=vabsdiffw($Vu32,$Vv32)", (V6_vabsdiffw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsh_altAlias : InstAlias<"$Vd32=vabsh($Vu32)", (V6_vabsh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsh_alt_128BAlias : InstAlias<"$Vd32=vabsh($Vu32)", (V6_vabsh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsh_sat_altAlias : InstAlias<"$Vd32=vabsh($Vu32):sat", (V6_vabsh_sat VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsh_sat_alt_128BAlias : InstAlias<"$Vd32=vabsh($Vu32):sat", (V6_vabsh_sat VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsuh_altAlias : InstAlias<"$Vd32.uh=vabs($Vu32.h)", (V6_vabsh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsuh_alt_128BAlias : InstAlias<"$Vd32.uh=vabs($Vu32.h)", (V6_vabsh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsuw_altAlias : InstAlias<"$Vd32.uw=vabs($Vu32.w)", (V6_vabsw VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsuw_alt_128BAlias : InstAlias<"$Vd32.uw=vabs($Vu32.w)", (V6_vabsw VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsw_altAlias : InstAlias<"$Vd32=vabsw($Vu32)", (V6_vabsw VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsw_alt_128BAlias : InstAlias<"$Vd32=vabsw($Vu32)", (V6_vabsw VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsw_sat_altAlias : InstAlias<"$Vd32=vabsw($Vu32):sat", (V6_vabsw_sat VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsw_sat_alt_128BAlias : InstAlias<"$Vd32=vabsw($Vu32):sat", (V6_vabsw_sat VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddb_altAlias : InstAlias<"$Vd32=vaddb($Vu32,$Vv32)", (V6_vaddb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddb_alt_128BAlias : InstAlias<"$Vd32=vaddb($Vu32,$Vv32)", (V6_vaddb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddb_dv_altAlias : InstAlias<"$Vdd32=vaddb($Vuu32,$Vvv32)", (V6_vaddb_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddb_dv_alt_128BAlias : InstAlias<"$Vdd32=vaddb($Vuu32,$Vvv32)", (V6_vaddb_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddbnq_altAlias : InstAlias<"if (!$Qv4.b) $Vx32.b+=$Vu32.b", (V6_vaddbnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddbnq_alt_128BAlias : InstAlias<"if (!$Qv4.b) $Vx32.b+=$Vu32.b", (V6_vaddbnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddbq_altAlias : InstAlias<"if ($Qv4.b) $Vx32.b+=$Vu32.b", (V6_vaddbq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddbq_alt_128BAlias : InstAlias<"if ($Qv4.b) $Vx32.b+=$Vu32.b", (V6_vaddbq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddh_altAlias : InstAlias<"$Vd32=vaddh($Vu32,$Vv32)", (V6_vaddh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddh_alt_128BAlias : InstAlias<"$Vd32=vaddh($Vu32,$Vv32)", (V6_vaddh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddh_dv_altAlias : InstAlias<"$Vdd32=vaddh($Vuu32,$Vvv32)", (V6_vaddh_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddh_dv_alt_128BAlias : InstAlias<"$Vdd32=vaddh($Vuu32,$Vvv32)", (V6_vaddh_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddhnq_altAlias : InstAlias<"if (!$Qv4.h) $Vx32.h+=$Vu32.h", (V6_vaddhnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddhnq_alt_128BAlias : InstAlias<"if (!$Qv4.h) $Vx32.h+=$Vu32.h", (V6_vaddhnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddhq_altAlias : InstAlias<"if ($Qv4.h) $Vx32.h+=$Vu32.h", (V6_vaddhq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddhq_alt_128BAlias : InstAlias<"if ($Qv4.h) $Vx32.h+=$Vu32.h", (V6_vaddhq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddhsat_altAlias : InstAlias<"$Vd32=vaddh($Vu32,$Vv32):sat", (V6_vaddhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddhsat_alt_128BAlias : InstAlias<"$Vd32=vaddh($Vu32,$Vv32):sat", (V6_vaddhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddhsat_dv_altAlias : InstAlias<"$Vdd32=vaddh($Vuu32,$Vvv32):sat", (V6_vaddhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddhsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vaddh($Vuu32,$Vvv32):sat", (V6_vaddhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddhw_altAlias : InstAlias<"$Vdd32=vaddh($Vu32,$Vv32)", (V6_vaddhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddhw_alt_128BAlias : InstAlias<"$Vdd32=vaddh($Vu32,$Vv32)", (V6_vaddhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddubh_altAlias : InstAlias<"$Vdd32=vaddub($Vu32,$Vv32)", (V6_vaddubh VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddubh_alt_128BAlias : InstAlias<"$Vdd32=vaddub($Vu32,$Vv32)", (V6_vaddubh VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddubsat_altAlias : InstAlias<"$Vd32=vaddub($Vu32,$Vv32):sat", (V6_vaddubsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddubsat_alt_128BAlias : InstAlias<"$Vd32=vaddub($Vu32,$Vv32):sat", (V6_vaddubsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddubsat_dv_altAlias : InstAlias<"$Vdd32=vaddub($Vuu32,$Vvv32):sat", (V6_vaddubsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddubsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vaddub($Vuu32,$Vvv32):sat", (V6_vaddubsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vadduhsat_altAlias : InstAlias<"$Vd32=vadduh($Vu32,$Vv32):sat", (V6_vadduhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vadduhsat_alt_128BAlias : InstAlias<"$Vd32=vadduh($Vu32,$Vv32):sat", (V6_vadduhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vadduhsat_dv_altAlias : InstAlias<"$Vdd32=vadduh($Vuu32,$Vvv32):sat", (V6_vadduhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vadduhsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vadduh($Vuu32,$Vvv32):sat", (V6_vadduhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vadduhw_altAlias : InstAlias<"$Vdd32=vadduh($Vu32,$Vv32)", (V6_vadduhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vadduhw_alt_128BAlias : InstAlias<"$Vdd32=vadduh($Vu32,$Vv32)", (V6_vadduhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddw_altAlias : InstAlias<"$Vd32=vaddw($Vu32,$Vv32)", (V6_vaddw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddw_alt_128BAlias : InstAlias<"$Vd32=vaddw($Vu32,$Vv32)", (V6_vaddw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddw_dv_altAlias : InstAlias<"$Vdd32=vaddw($Vuu32,$Vvv32)", (V6_vaddw_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddw_dv_alt_128BAlias : InstAlias<"$Vdd32=vaddw($Vuu32,$Vvv32)", (V6_vaddw_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddwnq_altAlias : InstAlias<"if (!$Qv4.w) $Vx32.w+=$Vu32.w", (V6_vaddwnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddwnq_alt_128BAlias : InstAlias<"if (!$Qv4.w) $Vx32.w+=$Vu32.w", (V6_vaddwnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddwq_altAlias : InstAlias<"if ($Qv4.w) $Vx32.w+=$Vu32.w", (V6_vaddwq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddwq_alt_128BAlias : InstAlias<"if ($Qv4.w) $Vx32.w+=$Vu32.w", (V6_vaddwq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddwsat_altAlias : InstAlias<"$Vd32=vaddw($Vu32,$Vv32):sat", (V6_vaddwsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddwsat_alt_128BAlias : InstAlias<"$Vd32=vaddw($Vu32,$Vv32):sat", (V6_vaddwsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddwsat_dv_altAlias : InstAlias<"$Vdd32=vaddw($Vuu32,$Vvv32):sat", (V6_vaddwsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddwsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vaddw($Vuu32,$Vvv32):sat", (V6_vaddwsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vandqrt_acc_altAlias : InstAlias<"$Vx32.ub|=vand($Qu4.ub,$Rt32.ub)", (V6_vandqrt_acc VectorRegs:$Vx32, VecPredRegs:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandqrt_acc_alt_128BAlias : InstAlias<"$Vx32.ub|=vand($Qu4.ub,$Rt32.ub)", (V6_vandqrt_acc VectorRegs:$Vx32, VecPredRegs:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandqrt_altAlias : InstAlias<"$Vd32.ub=vand($Qu4.ub,$Rt32.ub)", (V6_vandqrt VectorRegs:$Vd32, VecPredRegs:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandqrt_alt_128BAlias : InstAlias<"$Vd32.ub=vand($Qu4.ub,$Rt32.ub)", (V6_vandqrt VectorRegs:$Vd32, VecPredRegs:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandvrt_acc_altAlias : InstAlias<"$Qx4.ub|=vand($Vu32.ub,$Rt32.ub)", (V6_vandvrt_acc VecPredRegs:$Qx4, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandvrt_acc_alt_128BAlias : InstAlias<"$Qx4.ub|=vand($Vu32.ub,$Rt32.ub)", (V6_vandvrt_acc VecPredRegs:$Qx4, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandvrt_altAlias : InstAlias<"$Qd4.ub=vand($Vu32.ub,$Rt32.ub)", (V6_vandvrt VecPredRegs:$Qd4, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandvrt_alt_128BAlias : InstAlias<"$Qd4.ub=vand($Vu32.ub,$Rt32.ub)", (V6_vandvrt VecPredRegs:$Qd4, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslh_altAlias : InstAlias<"$Vd32=vaslh($Vu32,$Rt32)", (V6_vaslh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslh_alt_128BAlias : InstAlias<"$Vd32=vaslh($Vu32,$Rt32)", (V6_vaslh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslhv_altAlias : InstAlias<"$Vd32=vaslh($Vu32,$Vv32)", (V6_vaslhv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaslhv_alt_128BAlias : InstAlias<"$Vd32=vaslh($Vu32,$Vv32)", (V6_vaslhv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaslw_acc_altAlias : InstAlias<"$Vx32+=vaslw($Vu32,$Rt32)", (V6_vaslw_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslw_acc_alt_128BAlias : InstAlias<"$Vx32+=vaslw($Vu32,$Rt32)", (V6_vaslw_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslw_altAlias : InstAlias<"$Vd32=vaslw($Vu32,$Rt32)", (V6_vaslw VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslw_alt_128BAlias : InstAlias<"$Vd32=vaslw($Vu32,$Rt32)", (V6_vaslw VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslwv_altAlias : InstAlias<"$Vd32=vaslw($Vu32,$Vv32)", (V6_vaslwv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaslwv_alt_128BAlias : InstAlias<"$Vd32=vaslw($Vu32,$Vv32)", (V6_vaslwv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vasrh_altAlias : InstAlias<"$Vd32=vasrh($Vu32,$Rt32)", (V6_vasrh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vasrh_alt_128BAlias : InstAlias<"$Vd32=vasrh($Vu32,$Rt32)", (V6_vasrh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vasrhbrndsat_altAlias : InstAlias<"$Vd32=vasrhb($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhbrndsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrhubrndsat_altAlias : InstAlias<"$Vd32=vasrhub($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhubrndsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrhubsat_altAlias : InstAlias<"$Vd32=vasrhub($Vu32,$Vv32,$Rt8):sat", (V6_vasrhubsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrhv_altAlias : InstAlias<"$Vd32=vasrh($Vu32,$Vv32)", (V6_vasrhv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vasrhv_alt_128BAlias : InstAlias<"$Vd32=vasrh($Vu32,$Vv32)", (V6_vasrhv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vasrw_acc_altAlias : InstAlias<"$Vx32+=vasrw($Vu32,$Rt32)", (V6_vasrw_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vasrw_acc_alt_128BAlias : InstAlias<"$Vx32+=vasrw($Vu32,$Rt32)", (V6_vasrw_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vasrw_altAlias : InstAlias<"$Vd32=vasrw($Vu32,$Rt32)", (V6_vasrw VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vasrw_alt_128BAlias : InstAlias<"$Vd32=vasrw($Vu32,$Rt32)", (V6_vasrw VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vasrwh_altAlias : InstAlias<"$Vd32=vasrwh($Vu32,$Vv32,$Rt8)", (V6_vasrwhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrwhrndsat_altAlias : InstAlias<"$Vd32=vasrwh($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrwhrndsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrwhsat_altAlias : InstAlias<"$Vd32=vasrwh($Vu32,$Vv32,$Rt8):sat", (V6_vasrwhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrwuhsat_altAlias : InstAlias<"$Vd32=vasrwuh($Vu32,$Vv32,$Rt8):sat", (V6_vasrwuhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrwv_altAlias : InstAlias<"$Vd32=vasrw($Vu32,$Vv32)", (V6_vasrwv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vasrwv_alt_128BAlias : InstAlias<"$Vd32=vasrw($Vu32,$Vv32)", (V6_vasrwv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgh_altAlias : InstAlias<"$Vd32=vavgh($Vu32,$Vv32)", (V6_vavgh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgh_alt_128BAlias : InstAlias<"$Vd32=vavgh($Vu32,$Vv32)", (V6_vavgh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavghrnd_altAlias : InstAlias<"$Vd32=vavgh($Vu32,$Vv32):rnd", (V6_vavghrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavghrnd_alt_128BAlias : InstAlias<"$Vd32=vavgh($Vu32,$Vv32):rnd", (V6_vavghrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgub_altAlias : InstAlias<"$Vd32=vavgub($Vu32,$Vv32)", (V6_vavgub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgub_alt_128BAlias : InstAlias<"$Vd32=vavgub($Vu32,$Vv32)", (V6_vavgub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgubrnd_altAlias : InstAlias<"$Vd32=vavgub($Vu32,$Vv32):rnd", (V6_vavgubrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgubrnd_alt_128BAlias : InstAlias<"$Vd32=vavgub($Vu32,$Vv32):rnd", (V6_vavgubrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavguh_altAlias : InstAlias<"$Vd32=vavguh($Vu32,$Vv32)", (V6_vavguh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavguh_alt_128BAlias : InstAlias<"$Vd32=vavguh($Vu32,$Vv32)", (V6_vavguh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavguhrnd_altAlias : InstAlias<"$Vd32=vavguh($Vu32,$Vv32):rnd", (V6_vavguhrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavguhrnd_alt_128BAlias : InstAlias<"$Vd32=vavguh($Vu32,$Vv32):rnd", (V6_vavguhrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgw_altAlias : InstAlias<"$Vd32=vavgw($Vu32,$Vv32)", (V6_vavgw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgw_alt_128BAlias : InstAlias<"$Vd32=vavgw($Vu32,$Vv32)", (V6_vavgw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgwrnd_altAlias : InstAlias<"$Vd32=vavgw($Vu32,$Vv32):rnd", (V6_vavgwrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgwrnd_alt_128BAlias : InstAlias<"$Vd32=vavgw($Vu32,$Vv32):rnd", (V6_vavgwrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vcl0h_altAlias : InstAlias<"$Vd32=vcl0h($Vu32)", (V6_vcl0h VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vcl0h_alt_128BAlias : InstAlias<"$Vd32=vcl0h($Vu32)", (V6_vcl0h VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vcl0w_altAlias : InstAlias<"$Vd32=vcl0w($Vu32)", (V6_vcl0w VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vcl0w_alt_128BAlias : InstAlias<"$Vd32=vcl0w($Vu32)", (V6_vcl0w VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vd0Alias : InstAlias<"$Vd32=#0", (V6_vxor VectorRegs:$Vd32, VectorRegs:$Vd32, VectorRegs:$Vd32)>, Requires<[UseHVX]>;
+def V6_vd0_128BAlias : InstAlias<"$Vd32=#0", (V6_vxor VectorRegs:$Vd32, VectorRegs:$Vd32, VectorRegs:$Vd32)>, Requires<[UseHVX]>;
+def V6_vdd0Alias : InstAlias<"$Vdd32=#0", (V6_vsubw_dv VecDblRegs:$Vdd32, W15, W15)>, Requires<[UseHVX]>;
+def V6_vdd0_128BAlias : InstAlias<"$Vdd32=#0", (V6_vsubw_dv VecDblRegs:$Vdd32, W15, W15)>, Requires<[UseHVX]>;
+def V6_vdealb4w_altAlias : InstAlias<"$Vd32=vdealb4w($Vu32,$Vv32)", (V6_vdealb4w VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vdealb4w_alt_128BAlias : InstAlias<"$Vd32=vdealb4w($Vu32,$Vv32)", (V6_vdealb4w VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vdealb_altAlias : InstAlias<"$Vd32=vdealb($Vu32)", (V6_vdealb VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vdealb_alt_128BAlias : InstAlias<"$Vd32=vdealb($Vu32)", (V6_vdealb VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vdealh_altAlias : InstAlias<"$Vd32=vdealh($Vu32)", (V6_vdealh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vdealh_alt_128BAlias : InstAlias<"$Vd32=vdealh($Vu32)", (V6_vdealh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_acc_altAlias : InstAlias<"$Vx32+=vdmpybus($Vu32,$Rt32)", (V6_vdmpybus_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpybus($Vu32,$Rt32)", (V6_vdmpybus_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_altAlias : InstAlias<"$Vd32=vdmpybus($Vu32,$Rt32)", (V6_vdmpybus VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_alt_128BAlias : InstAlias<"$Vd32=vdmpybus($Vu32,$Rt32)", (V6_vdmpybus VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_dv_acc_altAlias : InstAlias<"$Vxx32+=vdmpybus($Vuu32,$Rt32)", (V6_vdmpybus_dv_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_dv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vdmpybus($Vuu32,$Rt32)", (V6_vdmpybus_dv_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_dv_altAlias : InstAlias<"$Vdd32=vdmpybus($Vuu32,$Rt32)", (V6_vdmpybus_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_dv_alt_128BAlias : InstAlias<"$Vdd32=vdmpybus($Vuu32,$Rt32)", (V6_vdmpybus_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_acc_altAlias : InstAlias<"$Vx32+=vdmpyhb($Vu32,$Rt32)", (V6_vdmpyhb_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpyhb($Vu32,$Rt32)", (V6_vdmpyhb_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_altAlias : InstAlias<"$Vd32=vdmpyhb($Vu32,$Rt32)", (V6_vdmpyhb VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_alt_128BAlias : InstAlias<"$Vd32=vdmpyhb($Vu32,$Rt32)", (V6_vdmpyhb VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_dv_acc_altAlias : InstAlias<"$Vxx32+=vdmpyhb($Vuu32,$Rt32)", (V6_vdmpyhb_dv_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_dv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vdmpyhb($Vuu32,$Rt32)", (V6_vdmpyhb_dv_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_dv_altAlias : InstAlias<"$Vdd32=vdmpyhb($Vuu32,$Rt32)", (V6_vdmpyhb_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_dv_alt_128BAlias : InstAlias<"$Vdd32=vdmpyhb($Vuu32,$Rt32)", (V6_vdmpyhb_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhisat_acc_altAlias : InstAlias<"$Vx32+=vdmpyh($Vuu32,$Rt32):sat", (V6_vdmpyhisat_acc VectorRegs:$Vx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhisat_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpyh($Vuu32,$Rt32):sat", (V6_vdmpyhisat_acc VectorRegs:$Vx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhisat_altAlias : InstAlias<"$Vd32=vdmpyh($Vuu32,$Rt32):sat", (V6_vdmpyhisat VectorRegs:$Vd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhisat_alt_128BAlias : InstAlias<"$Vd32=vdmpyh($Vuu32,$Rt32):sat", (V6_vdmpyhisat VectorRegs:$Vd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsat_acc_altAlias : InstAlias<"$Vx32+=vdmpyh($Vu32,$Rt32):sat", (V6_vdmpyhsat_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsat_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpyh($Vu32,$Rt32):sat", (V6_vdmpyhsat_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsat_altAlias : InstAlias<"$Vd32=vdmpyh($Vu32,$Rt32):sat", (V6_vdmpyhsat VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsat_alt_128BAlias : InstAlias<"$Vd32=vdmpyh($Vu32,$Rt32):sat", (V6_vdmpyhsat VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsuisat_acc_altAlias : InstAlias<"$Vx32+=vdmpyhsu($Vuu32,$Rt32,#1):sat", (V6_vdmpyhsuisat_acc VectorRegs:$Vx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsuisat_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpyhsu($Vuu32,$Rt32,#1):sat", (V6_vdmpyhsuisat_acc VectorRegs:$Vx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsuisat_altAlias : InstAlias<"$Vd32=vdmpyhsu($Vuu32,$Rt32,#1):sat", (V6_vdmpyhsuisat VectorRegs:$Vd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsuisat_alt_128BAlias : InstAlias<"$Vd32=vdmpyhsu($Vuu32,$Rt32,#1):sat", (V6_vdmpyhsuisat VectorRegs:$Vd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsusat_acc_altAlias : InstAlias<"$Vx32+=vdmpyhsu($Vu32,$Rt32):sat", (V6_vdmpyhsusat_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsusat_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpyhsu($Vu32,$Rt32):sat", (V6_vdmpyhsusat_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsusat_altAlias : InstAlias<"$Vd32=vdmpyhsu($Vu32,$Rt32):sat", (V6_vdmpyhsusat VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsusat_alt_128BAlias : InstAlias<"$Vd32=vdmpyhsu($Vu32,$Rt32):sat", (V6_vdmpyhsusat VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhvsat_acc_altAlias : InstAlias<"$Vx32+=vdmpyh($Vu32,$Vv32):sat", (V6_vdmpyhvsat_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vdmpyhvsat_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpyh($Vu32,$Vv32):sat", (V6_vdmpyhvsat_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vdmpyhvsat_altAlias : InstAlias<"$Vd32=vdmpyh($Vu32,$Vv32):sat", (V6_vdmpyhvsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vdmpyhvsat_alt_128BAlias : InstAlias<"$Vd32=vdmpyh($Vu32,$Vv32):sat", (V6_vdmpyhvsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vdsaduh_acc_altAlias : InstAlias<"$Vxx32+=vdsaduh($Vuu32,$Rt32)", (V6_vdsaduh_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdsaduh_acc_alt_128BAlias : InstAlias<"$Vxx32+=vdsaduh($Vuu32,$Rt32)", (V6_vdsaduh_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdsaduh_altAlias : InstAlias<"$Vdd32=vdsaduh($Vuu32,$Rt32)", (V6_vdsaduh VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdsaduh_alt_128BAlias : InstAlias<"$Vdd32=vdsaduh($Vuu32,$Rt32)", (V6_vdsaduh VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vlsrh_altAlias : InstAlias<"$Vd32=vlsrh($Vu32,$Rt32)", (V6_vlsrh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vlsrh_alt_128BAlias : InstAlias<"$Vd32=vlsrh($Vu32,$Rt32)", (V6_vlsrh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vlsrhv_altAlias : InstAlias<"$Vd32=vlsrh($Vu32,$Vv32)", (V6_vlsrhv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vlsrhv_alt_128BAlias : InstAlias<"$Vd32=vlsrh($Vu32,$Vv32)", (V6_vlsrhv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vlsrw_altAlias : InstAlias<"$Vd32=vlsrw($Vu32,$Rt32)", (V6_vlsrw VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vlsrw_alt_128BAlias : InstAlias<"$Vd32=vlsrw($Vu32,$Rt32)", (V6_vlsrw VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vlsrwv_altAlias : InstAlias<"$Vd32=vlsrw($Vu32,$Vv32)", (V6_vlsrwv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vlsrwv_alt_128BAlias : InstAlias<"$Vd32=vlsrw($Vu32,$Vv32)", (V6_vlsrwv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxh_altAlias : InstAlias<"$Vd32=vmaxh($Vu32,$Vv32)", (V6_vmaxh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxh_alt_128BAlias : InstAlias<"$Vd32=vmaxh($Vu32,$Vv32)", (V6_vmaxh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxub_altAlias : InstAlias<"$Vd32=vmaxub($Vu32,$Vv32)", (V6_vmaxub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxub_alt_128BAlias : InstAlias<"$Vd32=vmaxub($Vu32,$Vv32)", (V6_vmaxub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxuh_altAlias : InstAlias<"$Vd32=vmaxuh($Vu32,$Vv32)", (V6_vmaxuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxuh_alt_128BAlias : InstAlias<"$Vd32=vmaxuh($Vu32,$Vv32)", (V6_vmaxuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxw_altAlias : InstAlias<"$Vd32=vmaxw($Vu32,$Vv32)", (V6_vmaxw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxw_alt_128BAlias : InstAlias<"$Vd32=vmaxw($Vu32,$Vv32)", (V6_vmaxw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminh_altAlias : InstAlias<"$Vd32=vminh($Vu32,$Vv32)", (V6_vminh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminh_alt_128BAlias : InstAlias<"$Vd32=vminh($Vu32,$Vv32)", (V6_vminh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminub_altAlias : InstAlias<"$Vd32=vminub($Vu32,$Vv32)", (V6_vminub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminub_alt_128BAlias : InstAlias<"$Vd32=vminub($Vu32,$Vv32)", (V6_vminub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminuh_altAlias : InstAlias<"$Vd32=vminuh($Vu32,$Vv32)", (V6_vminuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminuh_alt_128BAlias : InstAlias<"$Vd32=vminuh($Vu32,$Vv32)", (V6_vminuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminw_altAlias : InstAlias<"$Vd32=vminw($Vu32,$Vv32)", (V6_vminw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminw_alt_128BAlias : InstAlias<"$Vd32=vminw($Vu32,$Vv32)", (V6_vminw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpabus_acc_altAlias : InstAlias<"$Vxx32+=vmpabus($Vuu32,$Rt32)", (V6_vmpabus_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpabus_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpabus($Vuu32,$Rt32)", (V6_vmpabus_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpabus_altAlias : InstAlias<"$Vdd32=vmpabus($Vuu32,$Rt32)", (V6_vmpabus VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpabus_alt_128BAlias : InstAlias<"$Vdd32=vmpabus($Vuu32,$Rt32)", (V6_vmpabus VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpabusv_altAlias : InstAlias<"$Vdd32=vmpabus($Vuu32,$Vvv32)", (V6_vmpabusv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vmpabusv_alt_128BAlias : InstAlias<"$Vdd32=vmpabus($Vuu32,$Vvv32)", (V6_vmpabusv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vmpabuuv_altAlias : InstAlias<"$Vdd32=vmpabuu($Vuu32,$Vvv32)", (V6_vmpabuuv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vmpabuuv_alt_128BAlias : InstAlias<"$Vdd32=vmpabuu($Vuu32,$Vvv32)", (V6_vmpabuuv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vmpahb_acc_altAlias : InstAlias<"$Vxx32+=vmpahb($Vuu32,$Rt32)", (V6_vmpahb_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpahb_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpahb($Vuu32,$Rt32)", (V6_vmpahb_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpahb_altAlias : InstAlias<"$Vdd32=vmpahb($Vuu32,$Rt32)", (V6_vmpahb VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpahb_alt_128BAlias : InstAlias<"$Vdd32=vmpahb($Vuu32,$Rt32)", (V6_vmpahb VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpybus_acc_altAlias : InstAlias<"$Vxx32+=vmpybus($Vu32,$Rt32)", (V6_vmpybus_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpybus_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpybus($Vu32,$Rt32)", (V6_vmpybus_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpybus_altAlias : InstAlias<"$Vdd32=vmpybus($Vu32,$Rt32)", (V6_vmpybus VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpybus_alt_128BAlias : InstAlias<"$Vdd32=vmpybus($Vu32,$Rt32)", (V6_vmpybus VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpybusv_acc_altAlias : InstAlias<"$Vxx32+=vmpybus($Vu32,$Vv32)", (V6_vmpybusv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybusv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpybus($Vu32,$Vv32)", (V6_vmpybusv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybusv_altAlias : InstAlias<"$Vdd32=vmpybus($Vu32,$Vv32)", (V6_vmpybusv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybusv_alt_128BAlias : InstAlias<"$Vdd32=vmpybus($Vu32,$Vv32)", (V6_vmpybusv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybv_acc_altAlias : InstAlias<"$Vxx32+=vmpyb($Vu32,$Vv32)", (V6_vmpybv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyb($Vu32,$Vv32)", (V6_vmpybv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybv_altAlias : InstAlias<"$Vdd32=vmpyb($Vu32,$Vv32)", (V6_vmpybv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybv_alt_128BAlias : InstAlias<"$Vdd32=vmpyb($Vu32,$Vv32)", (V6_vmpybv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyewuh_altAlias : InstAlias<"$Vd32=vmpyewuh($Vu32,$Vv32)", (V6_vmpyewuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyewuh_alt_128BAlias : InstAlias<"$Vd32=vmpyewuh($Vu32,$Vv32)", (V6_vmpyewuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyh_altAlias : InstAlias<"$Vdd32=vmpyh($Vu32,$Rt32)", (V6_vmpyh VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyh_alt_128BAlias : InstAlias<"$Vdd32=vmpyh($Vu32,$Rt32)", (V6_vmpyh VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhsat_acc_altAlias : InstAlias<"$Vxx32+=vmpyh($Vu32,$Rt32):sat", (V6_vmpyhsat_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhsat_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyh($Vu32,$Rt32):sat", (V6_vmpyhsat_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhsrs_altAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Rt32):<<1:rnd:sat", (V6_vmpyhsrs VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhsrs_alt_128BAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Rt32):<<1:rnd:sat", (V6_vmpyhsrs VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhss_altAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Rt32):<<1:sat", (V6_vmpyhss VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhss_alt_128BAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Rt32):<<1:sat", (V6_vmpyhss VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhus_acc_altAlias : InstAlias<"$Vxx32+=vmpyhus($Vu32,$Vv32)", (V6_vmpyhus_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhus_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyhus($Vu32,$Vv32)", (V6_vmpyhus_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhus_altAlias : InstAlias<"$Vdd32=vmpyhus($Vu32,$Vv32)", (V6_vmpyhus VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhus_alt_128BAlias : InstAlias<"$Vdd32=vmpyhus($Vu32,$Vv32)", (V6_vmpyhus VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhv_acc_altAlias : InstAlias<"$Vxx32+=vmpyh($Vu32,$Vv32)", (V6_vmpyhv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyh($Vu32,$Vv32)", (V6_vmpyhv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhv_altAlias : InstAlias<"$Vdd32=vmpyh($Vu32,$Vv32)", (V6_vmpyhv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhv_alt_128BAlias : InstAlias<"$Vdd32=vmpyh($Vu32,$Vv32)", (V6_vmpyhv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhvsrs_altAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Vv32):<<1:rnd:sat", (V6_vmpyhvsrs VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhvsrs_alt_128BAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Vv32):<<1:rnd:sat", (V6_vmpyhvsrs VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiewh_acc_altAlias : InstAlias<"$Vx32+=vmpyiewh($Vu32,$Vv32)", (V6_vmpyiewh_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiewh_acc_alt_128BAlias : InstAlias<"$Vx32+=vmpyiewh($Vu32,$Vv32)", (V6_vmpyiewh_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiewuh_acc_altAlias : InstAlias<"$Vx32+=vmpyiewuh($Vu32,$Vv32)", (V6_vmpyiewuh_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiewuh_acc_alt_128BAlias : InstAlias<"$Vx32+=vmpyiewuh($Vu32,$Vv32)", (V6_vmpyiewuh_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiewuh_altAlias : InstAlias<"$Vd32=vmpyiewuh($Vu32,$Vv32)", (V6_vmpyiewuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiewuh_alt_128BAlias : InstAlias<"$Vd32=vmpyiewuh($Vu32,$Vv32)", (V6_vmpyiewuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyih_acc_altAlias : InstAlias<"$Vx32+=vmpyih($Vu32,$Vv32)", (V6_vmpyih_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyih_acc_alt_128BAlias : InstAlias<"$Vx32+=vmpyih($Vu32,$Vv32)", (V6_vmpyih_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyih_altAlias : InstAlias<"$Vd32=vmpyih($Vu32,$Vv32)", (V6_vmpyih VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyih_alt_128BAlias : InstAlias<"$Vd32=vmpyih($Vu32,$Vv32)", (V6_vmpyih VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyihb_acc_altAlias : InstAlias<"$Vx32+=vmpyihb($Vu32,$Rt32)", (V6_vmpyihb_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyihb_acc_alt_128BAlias : InstAlias<"$Vx32+=vmpyihb($Vu32,$Rt32)", (V6_vmpyihb_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyihb_altAlias : InstAlias<"$Vd32=vmpyihb($Vu32,$Rt32)", (V6_vmpyihb VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyihb_alt_128BAlias : InstAlias<"$Vd32=vmpyihb($Vu32,$Rt32)", (V6_vmpyihb VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiowh_altAlias : InstAlias<"$Vd32=vmpyiowh($Vu32,$Vv32)", (V6_vmpyiowh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiowh_alt_128BAlias : InstAlias<"$Vd32=vmpyiowh($Vu32,$Vv32)", (V6_vmpyiowh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiwb_acc_altAlias : InstAlias<"$Vx32+=vmpyiwb($Vu32,$Rt32)", (V6_vmpyiwb_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwb_acc_alt_128BAlias : InstAlias<"$Vx32+=vmpyiwb($Vu32,$Rt32)", (V6_vmpyiwb_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwb_altAlias : InstAlias<"$Vd32=vmpyiwb($Vu32,$Rt32)", (V6_vmpyiwb VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwb_alt_128BAlias : InstAlias<"$Vd32=vmpyiwb($Vu32,$Rt32)", (V6_vmpyiwb VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwh_acc_altAlias : InstAlias<"$Vx32+=vmpyiwh($Vu32,$Rt32)", (V6_vmpyiwh_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwh_acc_alt_128BAlias : InstAlias<"$Vx32+=vmpyiwh($Vu32,$Rt32)", (V6_vmpyiwh_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwh_altAlias : InstAlias<"$Vd32=vmpyiwh($Vu32,$Rt32)", (V6_vmpyiwh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwh_alt_128BAlias : InstAlias<"$Vd32=vmpyiwh($Vu32,$Rt32)", (V6_vmpyiwh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyowh_altAlias : InstAlias<"$Vd32=vmpyowh($Vu32,$Vv32):<<1:sat", (V6_vmpyowh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyowh_alt_128BAlias : InstAlias<"$Vd32=vmpyowh($Vu32,$Vv32):<<1:sat", (V6_vmpyowh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyowh_rnd_altAlias : InstAlias<"$Vd32=vmpyowh($Vu32,$Vv32):<<1:rnd:sat", (V6_vmpyowh_rnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyowh_rnd_alt_128BAlias : InstAlias<"$Vd32=vmpyowh($Vu32,$Vv32):<<1:rnd:sat", (V6_vmpyowh_rnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyub_acc_altAlias : InstAlias<"$Vxx32+=vmpyub($Vu32,$Rt32)", (V6_vmpyub_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyub_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyub($Vu32,$Rt32)", (V6_vmpyub_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyub_altAlias : InstAlias<"$Vdd32=vmpyub($Vu32,$Rt32)", (V6_vmpyub VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyub_alt_128BAlias : InstAlias<"$Vdd32=vmpyub($Vu32,$Rt32)", (V6_vmpyub VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyubv_acc_altAlias : InstAlias<"$Vxx32+=vmpyub($Vu32,$Vv32)", (V6_vmpyubv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyubv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyub($Vu32,$Vv32)", (V6_vmpyubv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyubv_altAlias : InstAlias<"$Vdd32=vmpyub($Vu32,$Vv32)", (V6_vmpyubv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyubv_alt_128BAlias : InstAlias<"$Vdd32=vmpyub($Vu32,$Vv32)", (V6_vmpyubv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyuh_acc_altAlias : InstAlias<"$Vxx32+=vmpyuh($Vu32,$Rt32)", (V6_vmpyuh_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyuh_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyuh($Vu32,$Rt32)", (V6_vmpyuh_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyuh_altAlias : InstAlias<"$Vdd32=vmpyuh($Vu32,$Rt32)", (V6_vmpyuh VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyuh_alt_128BAlias : InstAlias<"$Vdd32=vmpyuh($Vu32,$Rt32)", (V6_vmpyuh VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyuhv_acc_altAlias : InstAlias<"$Vxx32+=vmpyuh($Vu32,$Vv32)", (V6_vmpyuhv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyuhv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyuh($Vu32,$Vv32)", (V6_vmpyuhv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyuhv_altAlias : InstAlias<"$Vdd32=vmpyuh($Vu32,$Vv32)", (V6_vmpyuhv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyuhv_alt_128BAlias : InstAlias<"$Vdd32=vmpyuh($Vu32,$Vv32)", (V6_vmpyuhv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnavgh_altAlias : InstAlias<"$Vd32=vnavgh($Vu32,$Vv32)", (V6_vnavgh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnavgh_alt_128BAlias : InstAlias<"$Vd32=vnavgh($Vu32,$Vv32)", (V6_vnavgh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnavgub_altAlias : InstAlias<"$Vd32=vnavgub($Vu32,$Vv32)", (V6_vnavgub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnavgub_alt_128BAlias : InstAlias<"$Vd32=vnavgub($Vu32,$Vv32)", (V6_vnavgub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnavgw_altAlias : InstAlias<"$Vd32=vnavgw($Vu32,$Vv32)", (V6_vnavgw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnavgw_alt_128BAlias : InstAlias<"$Vd32=vnavgw($Vu32,$Vv32)", (V6_vnavgw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnormamth_altAlias : InstAlias<"$Vd32=vnormamth($Vu32)", (V6_vnormamth VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vnormamth_alt_128BAlias : InstAlias<"$Vd32=vnormamth($Vu32)", (V6_vnormamth VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vnormamtw_altAlias : InstAlias<"$Vd32=vnormamtw($Vu32)", (V6_vnormamtw VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vnormamtw_alt_128BAlias : InstAlias<"$Vd32=vnormamtw($Vu32)", (V6_vnormamtw VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vpackeb_altAlias : InstAlias<"$Vd32=vpackeb($Vu32,$Vv32)", (V6_vpackeb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackeb_alt_128BAlias : InstAlias<"$Vd32=vpackeb($Vu32,$Vv32)", (V6_vpackeb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackeh_altAlias : InstAlias<"$Vd32=vpackeh($Vu32,$Vv32)", (V6_vpackeh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackeh_alt_128BAlias : InstAlias<"$Vd32=vpackeh($Vu32,$Vv32)", (V6_vpackeh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackhb_sat_altAlias : InstAlias<"$Vd32=vpackhb($Vu32,$Vv32):sat", (V6_vpackhb_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackhb_sat_alt_128BAlias : InstAlias<"$Vd32=vpackhb($Vu32,$Vv32):sat", (V6_vpackhb_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackhub_sat_altAlias : InstAlias<"$Vd32=vpackhub($Vu32,$Vv32):sat", (V6_vpackhub_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackhub_sat_alt_128BAlias : InstAlias<"$Vd32=vpackhub($Vu32,$Vv32):sat", (V6_vpackhub_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackob_altAlias : InstAlias<"$Vd32=vpackob($Vu32,$Vv32)", (V6_vpackob VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackob_alt_128BAlias : InstAlias<"$Vd32=vpackob($Vu32,$Vv32)", (V6_vpackob VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackoh_altAlias : InstAlias<"$Vd32=vpackoh($Vu32,$Vv32)", (V6_vpackoh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackoh_alt_128BAlias : InstAlias<"$Vd32=vpackoh($Vu32,$Vv32)", (V6_vpackoh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackwh_sat_altAlias : InstAlias<"$Vd32=vpackwh($Vu32,$Vv32):sat", (V6_vpackwh_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackwh_sat_alt_128BAlias : InstAlias<"$Vd32=vpackwh($Vu32,$Vv32):sat", (V6_vpackwh_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackwuh_sat_altAlias : InstAlias<"$Vd32=vpackwuh($Vu32,$Vv32):sat", (V6_vpackwuh_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackwuh_sat_alt_128BAlias : InstAlias<"$Vd32=vpackwuh($Vu32,$Vv32):sat", (V6_vpackwuh_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpopcounth_altAlias : InstAlias<"$Vd32=vpopcounth($Vu32)", (V6_vpopcounth VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vpopcounth_alt_128BAlias : InstAlias<"$Vd32=vpopcounth($Vu32)", (V6_vpopcounth VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vrmpybus_acc_altAlias : InstAlias<"$Vx32+=vrmpybus($Vu32,$Rt32)", (V6_vrmpybus_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpybus_acc_alt_128BAlias : InstAlias<"$Vx32+=vrmpybus($Vu32,$Rt32)", (V6_vrmpybus_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpybus_altAlias : InstAlias<"$Vd32=vrmpybus($Vu32,$Rt32)", (V6_vrmpybus VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpybus_alt_128BAlias : InstAlias<"$Vd32=vrmpybus($Vu32,$Rt32)", (V6_vrmpybus VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpybusi_acc_altAlias : InstAlias<"$Vxx32+=vrmpybus($Vuu32,$Rt32,#$Ii)", (V6_vrmpybusi_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpybusi_acc_alt_128BAlias : InstAlias<"$Vxx32+=vrmpybus($Vuu32,$Rt32,#$Ii)", (V6_vrmpybusi_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpybusi_altAlias : InstAlias<"$Vdd32=vrmpybus($Vuu32,$Rt32,#$Ii)", (V6_vrmpybusi VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpybusi_alt_128BAlias : InstAlias<"$Vdd32=vrmpybus($Vuu32,$Rt32,#$Ii)", (V6_vrmpybusi VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpybusv_acc_altAlias : InstAlias<"$Vx32+=vrmpybus($Vu32,$Vv32)", (V6_vrmpybusv_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybusv_acc_alt_128BAlias : InstAlias<"$Vx32+=vrmpybus($Vu32,$Vv32)", (V6_vrmpybusv_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybusv_altAlias : InstAlias<"$Vd32=vrmpybus($Vu32,$Vv32)", (V6_vrmpybusv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybusv_alt_128BAlias : InstAlias<"$Vd32=vrmpybus($Vu32,$Vv32)", (V6_vrmpybusv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybv_acc_altAlias : InstAlias<"$Vx32+=vrmpyb($Vu32,$Vv32)", (V6_vrmpybv_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybv_acc_alt_128BAlias : InstAlias<"$Vx32+=vrmpyb($Vu32,$Vv32)", (V6_vrmpybv_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybv_altAlias : InstAlias<"$Vd32=vrmpyb($Vu32,$Vv32)", (V6_vrmpybv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybv_alt_128BAlias : InstAlias<"$Vd32=vrmpyb($Vu32,$Vv32)", (V6_vrmpybv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpyub_acc_altAlias : InstAlias<"$Vx32+=vrmpyub($Vu32,$Rt32)", (V6_vrmpyub_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpyub_acc_alt_128BAlias : InstAlias<"$Vx32+=vrmpyub($Vu32,$Rt32)", (V6_vrmpyub_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpyub_altAlias : InstAlias<"$Vd32=vrmpyub($Vu32,$Rt32)", (V6_vrmpyub VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpyub_alt_128BAlias : InstAlias<"$Vd32=vrmpyub($Vu32,$Rt32)", (V6_vrmpyub VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpyubi_acc_altAlias : InstAlias<"$Vxx32+=vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpyubi_acc_alt_128BAlias : InstAlias<"$Vxx32+=vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpyubi_altAlias : InstAlias<"$Vdd32=vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpyubi_alt_128BAlias : InstAlias<"$Vdd32=vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpyubv_acc_altAlias : InstAlias<"$Vx32+=vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpyubv_acc_alt_128BAlias : InstAlias<"$Vx32+=vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpyubv_altAlias : InstAlias<"$Vd32=vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpyubv_alt_128BAlias : InstAlias<"$Vd32=vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundhb_altAlias : InstAlias<"$Vd32=vroundhb($Vu32,$Vv32):sat", (V6_vroundhb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundhb_alt_128BAlias : InstAlias<"$Vd32=vroundhb($Vu32,$Vv32):sat", (V6_vroundhb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundhub_altAlias : InstAlias<"$Vd32=vroundhub($Vu32,$Vv32):sat", (V6_vroundhub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundhub_alt_128BAlias : InstAlias<"$Vd32=vroundhub($Vu32,$Vv32):sat", (V6_vroundhub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundwh_altAlias : InstAlias<"$Vd32=vroundwh($Vu32,$Vv32):sat", (V6_vroundwh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundwh_alt_128BAlias : InstAlias<"$Vd32=vroundwh($Vu32,$Vv32):sat", (V6_vroundwh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundwuh_altAlias : InstAlias<"$Vd32=vroundwuh($Vu32,$Vv32):sat", (V6_vroundwuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundwuh_alt_128BAlias : InstAlias<"$Vd32=vroundwuh($Vu32,$Vv32):sat", (V6_vroundwuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrsadubi_acc_altAlias : InstAlias<"$Vxx32+=vrsadub($Vuu32,$Rt32,#$Ii)", (V6_vrsadubi_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrsadubi_acc_alt_128BAlias : InstAlias<"$Vxx32+=vrsadub($Vuu32,$Rt32,#$Ii)", (V6_vrsadubi_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrsadubi_altAlias : InstAlias<"$Vdd32=vrsadub($Vuu32,$Rt32,#$Ii)", (V6_vrsadubi VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrsadubi_alt_128BAlias : InstAlias<"$Vdd32=vrsadub($Vuu32,$Rt32,#$Ii)", (V6_vrsadubi VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vsathub_altAlias : InstAlias<"$Vd32=vsathub($Vu32,$Vv32)", (V6_vsathub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsathub_alt_128BAlias : InstAlias<"$Vd32=vsathub($Vu32,$Vv32)", (V6_vsathub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsatwh_altAlias : InstAlias<"$Vd32=vsatwh($Vu32,$Vv32)", (V6_vsatwh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsatwh_alt_128BAlias : InstAlias<"$Vd32=vsatwh($Vu32,$Vv32)", (V6_vsatwh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsb_altAlias : InstAlias<"$Vdd32=vsxtb($Vu32)", (V6_vsb VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsb_alt_128BAlias : InstAlias<"$Vdd32=vsxtb($Vu32)", (V6_vsb VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsh_altAlias : InstAlias<"$Vdd32=vsxth($Vu32)", (V6_vsh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsh_alt_128BAlias : InstAlias<"$Vdd32=vsxth($Vu32)", (V6_vsh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vshufeh_altAlias : InstAlias<"$Vd32=vshuffeh($Vu32,$Vv32)", (V6_vshufeh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufeh_alt_128BAlias : InstAlias<"$Vd32=vshuffeh($Vu32,$Vv32)", (V6_vshufeh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshuffb_altAlias : InstAlias<"$Vd32=vshuffb($Vu32)", (V6_vshuffb VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vshuffb_alt_128BAlias : InstAlias<"$Vd32=vshuffb($Vu32)", (V6_vshuffb VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vshuffeb_altAlias : InstAlias<"$Vd32=vshuffeb($Vu32,$Vv32)", (V6_vshuffeb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshuffeb_alt_128BAlias : InstAlias<"$Vd32=vshuffeb($Vu32,$Vv32)", (V6_vshuffeb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshuffh_altAlias : InstAlias<"$Vd32=vshuffh($Vu32)", (V6_vshuffh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vshuffh_alt_128BAlias : InstAlias<"$Vd32=vshuffh($Vu32)", (V6_vshuffh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vshuffob_altAlias : InstAlias<"$Vd32=vshuffob($Vu32,$Vv32)", (V6_vshuffob VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshuffob_alt_128BAlias : InstAlias<"$Vd32=vshuffob($Vu32,$Vv32)", (V6_vshuffob VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufoeb_altAlias : InstAlias<"$Vdd32=vshuffoeb($Vu32,$Vv32)", (V6_vshufoeb VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufoeb_alt_128BAlias : InstAlias<"$Vdd32=vshuffoeb($Vu32,$Vv32)", (V6_vshufoeb VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufoeh_altAlias : InstAlias<"$Vdd32=vshuffoeh($Vu32,$Vv32)", (V6_vshufoeh VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufoeh_alt_128BAlias : InstAlias<"$Vdd32=vshuffoeh($Vu32,$Vv32)", (V6_vshufoeh VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufoh_altAlias : InstAlias<"$Vd32=vshuffoh($Vu32,$Vv32)", (V6_vshufoh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufoh_alt_128BAlias : InstAlias<"$Vd32=vshuffoh($Vu32,$Vv32)", (V6_vshufoh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubb_altAlias : InstAlias<"$Vd32=vsubb($Vu32,$Vv32)", (V6_vsubb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubb_alt_128BAlias : InstAlias<"$Vd32=vsubb($Vu32,$Vv32)", (V6_vsubb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubb_dv_altAlias : InstAlias<"$Vdd32=vsubb($Vuu32,$Vvv32)", (V6_vsubb_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubb_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubb($Vuu32,$Vvv32)", (V6_vsubb_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubbnq_altAlias : InstAlias<"if (!$Qv4.b) $Vx32.b-=$Vu32.b", (V6_vsubbnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubbnq_alt_128BAlias : InstAlias<"if (!$Qv4.b) $Vx32.b-=$Vu32.b", (V6_vsubbnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubbq_altAlias : InstAlias<"if ($Qv4.b) $Vx32.b-=$Vu32.b", (V6_vsubbq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubbq_alt_128BAlias : InstAlias<"if ($Qv4.b) $Vx32.b-=$Vu32.b", (V6_vsubbq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubh_altAlias : InstAlias<"$Vd32=vsubh($Vu32,$Vv32)", (V6_vsubh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubh_alt_128BAlias : InstAlias<"$Vd32=vsubh($Vu32,$Vv32)", (V6_vsubh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubh_dv_altAlias : InstAlias<"$Vdd32=vsubh($Vuu32,$Vvv32)", (V6_vsubh_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubh_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubh($Vuu32,$Vvv32)", (V6_vsubh_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubhnq_altAlias : InstAlias<"if (!$Qv4.h) $Vx32.h-=$Vu32.h", (V6_vsubhnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubhnq_alt_128BAlias : InstAlias<"if (!$Qv4.h) $Vx32.h-=$Vu32.h", (V6_vsubhnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubhq_altAlias : InstAlias<"if ($Qv4.h) $Vx32.h-=$Vu32.h", (V6_vsubhq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubhq_alt_128BAlias : InstAlias<"if ($Qv4.h) $Vx32.h-=$Vu32.h", (V6_vsubhq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubhsat_altAlias : InstAlias<"$Vd32=vsubh($Vu32,$Vv32):sat", (V6_vsubhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubhsat_alt_128BAlias : InstAlias<"$Vd32=vsubh($Vu32,$Vv32):sat", (V6_vsubhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubhsat_dv_altAlias : InstAlias<"$Vdd32=vsubh($Vuu32,$Vvv32):sat", (V6_vsubhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubhsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubh($Vuu32,$Vvv32):sat", (V6_vsubhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubhw_altAlias : InstAlias<"$Vdd32=vsubh($Vu32,$Vv32)", (V6_vsubhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubhw_alt_128BAlias : InstAlias<"$Vdd32=vsubh($Vu32,$Vv32)", (V6_vsubhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsububh_altAlias : InstAlias<"$Vdd32=vsubub($Vu32,$Vv32)", (V6_vsububh VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsububh_alt_128BAlias : InstAlias<"$Vdd32=vsubub($Vu32,$Vv32)", (V6_vsububh VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsububsat_altAlias : InstAlias<"$Vd32=vsubub($Vu32,$Vv32):sat", (V6_vsububsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsububsat_alt_128BAlias : InstAlias<"$Vd32=vsubub($Vu32,$Vv32):sat", (V6_vsububsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsububsat_dv_altAlias : InstAlias<"$Vdd32=vsubub($Vuu32,$Vvv32):sat", (V6_vsububsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsububsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubub($Vuu32,$Vvv32):sat", (V6_vsububsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubuhsat_altAlias : InstAlias<"$Vd32=vsubuh($Vu32,$Vv32):sat", (V6_vsubuhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubuhsat_alt_128BAlias : InstAlias<"$Vd32=vsubuh($Vu32,$Vv32):sat", (V6_vsubuhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubuhsat_dv_altAlias : InstAlias<"$Vdd32=vsubuh($Vuu32,$Vvv32):sat", (V6_vsubuhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubuhsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubuh($Vuu32,$Vvv32):sat", (V6_vsubuhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubuhw_altAlias : InstAlias<"$Vdd32=vsubuh($Vu32,$Vv32)", (V6_vsubuhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubuhw_alt_128BAlias : InstAlias<"$Vdd32=vsubuh($Vu32,$Vv32)", (V6_vsubuhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubw_altAlias : InstAlias<"$Vd32=vsubw($Vu32,$Vv32)", (V6_vsubw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubw_alt_128BAlias : InstAlias<"$Vd32=vsubw($Vu32,$Vv32)", (V6_vsubw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubw_dv_altAlias : InstAlias<"$Vdd32=vsubw($Vuu32,$Vvv32)", (V6_vsubw_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubw_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubw($Vuu32,$Vvv32)", (V6_vsubw_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubwnq_altAlias : InstAlias<"if (!$Qv4.w) $Vx32.w-=$Vu32.w", (V6_vsubwnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubwnq_alt_128BAlias : InstAlias<"if (!$Qv4.w) $Vx32.w-=$Vu32.w", (V6_vsubwnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubwq_altAlias : InstAlias<"if ($Qv4.w) $Vx32.w-=$Vu32.w", (V6_vsubwq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubwq_alt_128BAlias : InstAlias<"if ($Qv4.w) $Vx32.w-=$Vu32.w", (V6_vsubwq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubwsat_altAlias : InstAlias<"$Vd32=vsubw($Vu32,$Vv32):sat", (V6_vsubwsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubwsat_alt_128BAlias : InstAlias<"$Vd32=vsubw($Vu32,$Vv32):sat", (V6_vsubwsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubwsat_dv_altAlias : InstAlias<"$Vdd32=vsubw($Vuu32,$Vvv32):sat", (V6_vsubwsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubwsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubw($Vuu32,$Vvv32):sat", (V6_vsubwsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vtmpyb_acc_altAlias : InstAlias<"$Vxx32+=vtmpyb($Vuu32,$Rt32)", (V6_vtmpyb_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyb_acc_alt_128BAlias : InstAlias<"$Vxx32+=vtmpyb($Vuu32,$Rt32)", (V6_vtmpyb_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyb_altAlias : InstAlias<"$Vdd32=vtmpyb($Vuu32,$Rt32)", (V6_vtmpyb VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyb_alt_128BAlias : InstAlias<"$Vdd32=vtmpyb($Vuu32,$Rt32)", (V6_vtmpyb VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpybus_acc_altAlias : InstAlias<"$Vxx32+=vtmpybus($Vuu32,$Rt32)", (V6_vtmpybus_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpybus_acc_alt_128BAlias : InstAlias<"$Vxx32+=vtmpybus($Vuu32,$Rt32)", (V6_vtmpybus_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpybus_altAlias : InstAlias<"$Vdd32=vtmpybus($Vuu32,$Rt32)", (V6_vtmpybus VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpybus_alt_128BAlias : InstAlias<"$Vdd32=vtmpybus($Vuu32,$Rt32)", (V6_vtmpybus VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyhb_acc_altAlias : InstAlias<"$Vxx32+=vtmpyhb($Vuu32,$Rt32)", (V6_vtmpyhb_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyhb_acc_alt_128BAlias : InstAlias<"$Vxx32+=vtmpyhb($Vuu32,$Rt32)", (V6_vtmpyhb_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyhb_altAlias : InstAlias<"$Vdd32=vtmpyhb($Vuu32,$Rt32)", (V6_vtmpyhb VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyhb_alt_128BAlias : InstAlias<"$Vdd32=vtmpyhb($Vuu32,$Rt32)", (V6_vtmpyhb VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtran2x2_mapAlias : InstAlias<"vtrans2x2($Vy32,$Vx32,$Rt32)", (V6_vshuff VectorRegs:$Vy32, VectorRegs:$Vx32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtran2x2_map_128BAlias : InstAlias<"vtrans2x2($Vy32,$Vx32,$Rt32)", (V6_vshuff VectorRegs:$Vy32, VectorRegs:$Vx32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vunpackb_altAlias : InstAlias<"$Vdd32=vunpackb($Vu32)", (V6_vunpackb VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackb_alt_128BAlias : InstAlias<"$Vdd32=vunpackb($Vu32)", (V6_vunpackb VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackh_altAlias : InstAlias<"$Vdd32=vunpackh($Vu32)", (V6_vunpackh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackh_alt_128BAlias : InstAlias<"$Vdd32=vunpackh($Vu32)", (V6_vunpackh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackoh_altAlias : InstAlias<"$Vxx32|=vunpackoh($Vu32)", (V6_vunpackoh VecDblRegs:$Vxx32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackoh_alt_128BAlias : InstAlias<"$Vxx32|=vunpackoh($Vu32)", (V6_vunpackoh VecDblRegs:$Vxx32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackub_altAlias : InstAlias<"$Vdd32=vunpackub($Vu32)", (V6_vunpackub VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackub_alt_128BAlias : InstAlias<"$Vdd32=vunpackub($Vu32)", (V6_vunpackub VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackuh_altAlias : InstAlias<"$Vdd32=vunpackuh($Vu32)", (V6_vunpackuh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackuh_alt_128BAlias : InstAlias<"$Vdd32=vunpackuh($Vu32)", (V6_vunpackuh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vzb_altAlias : InstAlias<"$Vdd32=vzxtb($Vu32)", (V6_vzb VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vzb_alt_128BAlias : InstAlias<"$Vdd32=vzxtb($Vu32)", (V6_vzb VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vzh_altAlias : InstAlias<"$Vdd32=vzxth($Vu32)", (V6_vzh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vzh_alt_128BAlias : InstAlias<"$Vdd32=vzxth($Vu32)", (V6_vzh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def Y2_dcfetchAlias : InstAlias<"dcfetch($Rs32)", (Y2_dcfetchbo IntRegs:$Rs32, 0)>;
diff --git a/lib/Target/Hexagon/HexagonDepOperands.td b/lib/Target/Hexagon/HexagonDepOperands.td
new file mode 100644
index 000000000000..0e83b2678732
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepOperands.td
@@ -0,0 +1,132 @@
+//===--- HexagonDepOperands.td --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s3_0Imm : Operand<i32> { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; }
+def s3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<3, 0>(N->getSExtValue());}]>;
+def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; }
+def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
+def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s29_3Imm : Operand<i32> { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; }
+def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
+def s10_6ImmOperand : AsmOperandClass { let Name = "s10_6Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s10_6Imm : Operand<i32> { let ParserMatchClass = s10_6ImmOperand; let DecoderMethod = "s10_6ImmDecoder"; }
+def s10_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<10, 6>(N->getSExtValue());}]>;
+def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; }
+def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
+def a30_2ImmOperand : AsmOperandClass { let Name = "a30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def a30_2Imm : Operand<i32> { let ParserMatchClass = a30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
+def a30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
+def u29_3ImmOperand : AsmOperandClass { let Name = "u29_3Imm"; let RenderMethod = "addImmOperands"; }
+def u29_3Imm : Operand<i32> { let ParserMatchClass = u29_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>;
+def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s8_0Imm : Operand<i32> { let ParserMatchClass = s8_0ImmOperand; let DecoderMethod = "s8_0ImmDecoder"; }
+def s8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<8, 0>(N->getSExtValue());}]>;
+def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; let RenderMethod = "addImmOperands"; }
+def u32_0Imm : Operand<i32> { let ParserMatchClass = u32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>;
+def u4_2ImmOperand : AsmOperandClass { let Name = "u4_2Imm"; let RenderMethod = "addImmOperands"; }
+def u4_2Imm : Operand<i32> { let ParserMatchClass = u4_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>;
+def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; let RenderMethod = "addImmOperands"; }
+def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>;
+def b15_2ImmOperand : AsmOperandClass { let Name = "b15_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def b15_2Imm : Operand<OtherVT> { let ParserMatchClass = b15_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
+def b15_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<15, 2>(N->getSExtValue());}]>;
+def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; let RenderMethod = "addImmOperands"; }
+def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u11_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>;
+def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; }
+def s4_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 3>(N->getSExtValue());}]>;
+def m32_0ImmOperand : AsmOperandClass { let Name = "m32_0Imm"; let RenderMethod = "addImmOperands"; }
+def m32_0Imm : Operand<i32> { let ParserMatchClass = m32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def m32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
+def u3_1ImmOperand : AsmOperandClass { let Name = "u3_1Imm"; let RenderMethod = "addImmOperands"; }
+def u3_1Imm : Operand<i32> { let ParserMatchClass = u3_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u3_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>;
+def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; let RenderMethod = "addImmOperands"; }
+def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u1_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>;
+def s31_1ImmOperand : AsmOperandClass { let Name = "s31_1Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s31_1Imm : Operand<i32> { let ParserMatchClass = s31_1ImmOperand; let DecoderMethod = "s31_1ImmDecoder"; }
+def s31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 1>(N->getSExtValue());}]>;
+def s30_2ImmOperand : AsmOperandClass { let Name = "s30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s30_2Imm : Operand<i32> { let ParserMatchClass = s30_2ImmOperand; let DecoderMethod = "s30_2ImmDecoder"; }
+def s30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
+def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; let RenderMethod = "addImmOperands"; }
+def u4_0Imm : Operand<i32> { let ParserMatchClass = u4_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>;
+def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s6_0Imm : Operand<i32> { let ParserMatchClass = s6_0ImmOperand; let DecoderMethod = "s6_0ImmDecoder"; }
+def s6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 0>(N->getSExtValue());}]>;
+def u5_3ImmOperand : AsmOperandClass { let Name = "u5_3Imm"; let RenderMethod = "addImmOperands"; }
+def u5_3Imm : Operand<i32> { let ParserMatchClass = u5_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u5_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>;
+def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand; let DecoderMethod = "s32_0ImmDecoder"; }
+def s32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
+def s6_3ImmOperand : AsmOperandClass { let Name = "s6_3Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s6_3Imm : Operand<i32> { let ParserMatchClass = s6_3ImmOperand; let DecoderMethod = "s6_3ImmDecoder"; }
+def s6_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 3>(N->getSExtValue());}]>;
+def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; let RenderMethod = "addImmOperands"; }
+def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>;
+def u31_1ImmOperand : AsmOperandClass { let Name = "u31_1Imm"; let RenderMethod = "addImmOperands"; }
+def u31_1Imm : Operand<i32> { let ParserMatchClass = u31_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>;
+def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand; let DecoderMethod = "s4_1ImmDecoder"; }
+def s4_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 1>(N->getSExtValue());}]>;
+def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; let RenderMethod = "addImmOperands"; }
+def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u16_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>;
+def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; let RenderMethod = "addImmOperands"; }
+def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u6_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>;
+def u5_2ImmOperand : AsmOperandClass { let Name = "u5_2Imm"; let RenderMethod = "addImmOperands"; }
+def u5_2Imm : Operand<i32> { let ParserMatchClass = u5_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u5_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>;
+def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; let RenderMethod = "addImmOperands"; }
+def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u26_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>;
+def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; let RenderMethod = "addImmOperands"; }
+def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u6_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>;
+def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; let RenderMethod = "addImmOperands"; }
+def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u7_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>;
+def b13_2ImmOperand : AsmOperandClass { let Name = "b13_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def b13_2Imm : Operand<OtherVT> { let ParserMatchClass = b13_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
+def b13_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<13, 2>(N->getSExtValue());}]>;
+def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; let RenderMethod = "addImmOperands"; }
+def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u5_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>;
+def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; let RenderMethod = "addImmOperands"; }
+def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u2_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>;
+def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand; let DecoderMethod = "s4_2ImmDecoder"; }
+def s4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 2>(N->getSExtValue());}]>;
+def b30_2ImmOperand : AsmOperandClass { let Name = "b30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def b30_2Imm : Operand<OtherVT> { let ParserMatchClass = b30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
+def b30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
+def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; let RenderMethod = "addImmOperands"; }
+def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>;
+def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; }
+def u30_2Imm : Operand<i32> { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;
+def s10_0ImmOperand : AsmOperandClass { let Name = "s10_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s10_0Imm : Operand<i32> { let ParserMatchClass = s10_0ImmOperand; let DecoderMethod = "s10_0ImmDecoder"; }
+def s10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<10, 0>(N->getSExtValue());}]>;
diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index a5351cd08da5..67af947e089d 100644
--- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -105,6 +105,8 @@ namespace {
cl::init(false), cl::desc("Enable branch probability info"));
cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden,
cl::desc("Size limit in Hexagon early if-conversion"));
+ cl::opt<bool> SkipExitBranches("eif-no-loop-exit", cl::init(false),
+ cl::Hidden, cl::desc("Do not convert branches that may exit the loop"));
struct PrintMB {
PrintMB(const MachineBasicBlock *B) : MB(B) {}
@@ -142,8 +144,8 @@ namespace {
raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
<< ", PredR:" << PrintReg(P.FP.PredR, &P.TRI)
- << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:"
- << PrintMB(P.FP.FalseB)
+ << ", TrueB:" << PrintMB(P.FP.TrueB)
+ << ", FalseB:" << PrintMB(P.FP.FalseB)
<< ", JoinB:" << PrintMB(P.FP.JoinB) << " }";
return OS;
}
@@ -187,7 +189,8 @@ namespace {
bool usesUndefVReg(const MachineInstr *MI) const;
bool isValid(const FlowPattern &FP) const;
unsigned countPredicateDefs(const MachineBasicBlock *B) const;
- unsigned computePhiCost(MachineBasicBlock *B) const;
+ unsigned computePhiCost(const MachineBasicBlock *B,
+ const FlowPattern &FP) const;
bool isProfitable(const FlowPattern &FP) const;
bool isPredicableStore(const MachineInstr *MI) const;
bool isSafeToSpeculate(const MachineInstr *MI) const;
@@ -199,6 +202,9 @@ namespace {
MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
unsigned PredR, bool IfTrue);
+ unsigned buildMux(MachineBasicBlock *B, MachineBasicBlock::iterator At,
+ const TargetRegisterClass *DRC, unsigned PredR, unsigned TR,
+ unsigned TSR, unsigned FR, unsigned FSR);
void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP);
void convert(const FlowPattern &FP);
@@ -230,7 +236,7 @@ bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const {
return false;
MachineBasicBlock *SB = *B->succ_begin();
MachineLoop *L = MLI->getLoopFor(SB);
- return L && SB == L->getHeader();
+ return L && SB == L->getHeader() && MDT->dominates(B, SB);
}
bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
@@ -264,9 +270,6 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
// mark as diamond with both sides equal?
return false;
}
- // Loop could be null for both.
- if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L)
- return false;
// Record the true/false blocks in such a way that "true" means "if (PredR)",
// and "false" means "if (!PredR)".
@@ -289,8 +292,14 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
// it has a single successor. In fact, the block has to end either with
// an unconditional branch (which can be predicated), or with a fall-
// through.
- bool TOk = (TNP == 1) && (TNS == 1);
- bool FOk = (FNP == 1) && (FNS == 1);
+ // Also, skip blocks that do not belong to the same loop.
+ bool TOk = (TNP == 1 && TNS == 1 && MLI->getLoopFor(TB) == L);
+ bool FOk = (FNP == 1 && FNS == 1 && MLI->getLoopFor(FB) == L);
+
+ // If requested (via an option), do not consider branches where the
+ // true and false targets do not belong to the same loop.
+ if (SkipExitBranches && MLI->getLoopFor(TB) != MLI->getLoopFor(FB))
+ return false;
// If neither is predicable, there is nothing interesting.
if (!TOk && !FOk)
@@ -307,17 +316,15 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
// Diamond: "if (P) then TB; else FB;".
} else {
// TOk && !FOk
- if (TSB == FB) {
+ if (TSB == FB)
JB = FB;
- FB = nullptr;
- }
+ FB = nullptr;
}
} else {
// !TOk && FOk (at least one must be true by now).
- if (FSB == TB) {
+ if (FSB == TB)
JB = TB;
- TB = nullptr;
- }
+ TB = nullptr;
}
// Don't try to predicate loop preheaders.
if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) {
@@ -383,8 +390,14 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
unsigned R = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(R))
continue;
- if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass)
- continue;
+ switch (MRI->getRegClass(R)->getID()) {
+ case Hexagon::PredRegsRegClassID:
+ case Hexagon::VecPredRegsRegClassID:
+ case Hexagon::VecPredRegs128BRegClassID:
+ break;
+ default:
+ continue;
+ }
for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U)
if (U->getParent()->isPHI())
return false;
@@ -442,24 +455,39 @@ bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
return true;
}
-unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const {
- assert(B->pred_size() <= 2);
+unsigned HexagonEarlyIfConversion::computePhiCost(const MachineBasicBlock *B,
+ const FlowPattern &FP) const {
if (B->pred_size() < 2)
return 0;
unsigned Cost = 0;
- MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI();
- for (I = B->begin(); I != E; ++I) {
- const MachineOperand &RO1 = I->getOperand(1);
- const MachineOperand &RO3 = I->getOperand(3);
- assert(RO1.isReg() && RO3.isReg());
+ for (const MachineInstr &MI : *B) {
+ if (!MI.isPHI())
+ break;
+ // If both incoming blocks are one of the TrueB/FalseB/SplitB, then
+ // a MUX may be needed. Otherwise the PHI will need to be updated at
+ // no extra cost.
+ // Find the interesting PHI operands for further checks.
+ SmallVector<unsigned,2> Inc;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ const MachineBasicBlock *BB = MI.getOperand(i+1).getMBB();
+ if (BB == FP.SplitB || BB == FP.TrueB || BB == FP.FalseB)
+ Inc.push_back(i);
+ }
+ assert(Inc.size() <= 2);
+ if (Inc.size() < 2)
+ continue;
+
+ const MachineOperand &RA = MI.getOperand(1);
+ const MachineOperand &RB = MI.getOperand(3);
+ assert(RA.isReg() && RB.isReg());
// Must have a MUX if the phi uses a subregister.
- if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) {
+ if (RA.getSubReg() != 0 || RB.getSubReg() != 0) {
Cost++;
continue;
}
- MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg());
- MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg());
+ const MachineInstr *Def1 = MRI->getVRegDef(RA.getReg());
+ const MachineInstr *Def3 = MRI->getVRegDef(RB.getReg());
if (!HII->isPredicable(*Def1) || !HII->isPredicable(*Def3))
Cost++;
}
@@ -485,7 +513,6 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs(
bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
if (FP.TrueB && FP.FalseB) {
-
// Do not IfCovert if the branch is one sided.
if (MBPI) {
BranchProbability Prob(9, 10);
@@ -510,18 +537,16 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
// the code size. If the predicated blocks are smaller than a packet size,
// approximate the spare room in the packet that could be filled with the
// predicated/speculated instructions.
- unsigned TS = 0, FS = 0, Spare = 0;
- if (FP.TrueB) {
- TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator());
- if (TS < HEXAGON_PACKET_SIZE)
- Spare += HEXAGON_PACKET_SIZE-TS;
- }
- if (FP.FalseB) {
- FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator());
- if (FS < HEXAGON_PACKET_SIZE)
- Spare += HEXAGON_PACKET_SIZE-TS;
- }
- unsigned TotalIn = TS+FS;
+ auto TotalCount = [] (const MachineBasicBlock *B, unsigned &Spare) {
+ if (!B)
+ return 0u;
+ unsigned T = std::distance(B->begin(), B->getFirstTerminator());
+ if (T < HEXAGON_PACKET_SIZE)
+ Spare += HEXAGON_PACKET_SIZE-T;
+ return T;
+ };
+ unsigned Spare = 0;
+ unsigned TotalIn = TotalCount(FP.TrueB, Spare) + TotalCount(FP.FalseB, Spare);
DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: "
<< TotalIn << ", spare room: " << Spare << "\n");
if (TotalIn >= SizeLimit+Spare)
@@ -536,17 +561,17 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
unsigned TotalPh = 0;
unsigned PredDefs = countPredicateDefs(FP.SplitB);
if (FP.JoinB) {
- TotalPh = computePhiCost(FP.JoinB);
+ TotalPh = computePhiCost(FP.JoinB, FP);
PredDefs += countPredicateDefs(FP.JoinB);
} else {
if (FP.TrueB && FP.TrueB->succ_size() > 0) {
MachineBasicBlock *SB = *FP.TrueB->succ_begin();
- TotalPh += computePhiCost(SB);
+ TotalPh += computePhiCost(SB, FP);
PredDefs += countPredicateDefs(SB);
}
if (FP.FalseB && FP.FalseB->succ_size() > 0) {
MachineBasicBlock *SB = *FP.FalseB->succ_begin();
- TotalPh += computePhiCost(SB);
+ TotalPh += computePhiCost(SB, FP);
PredDefs += countPredicateDefs(SB);
}
}
@@ -680,12 +705,12 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, HII->get(COpc));
MachineInstr::mop_iterator MOI = MI->operands_begin();
if (HII->isPostIncrement(*MI)) {
- MIB.addOperand(*MOI);
+ MIB.add(*MOI);
++MOI;
}
MIB.addReg(PredR);
for (const MachineOperand &MO : make_range(MOI, MI->operands_end()))
- MIB.addOperand(MO);
+ MIB.add(MO);
// Set memory references.
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
@@ -733,6 +758,43 @@ void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB,
}
}
+unsigned HexagonEarlyIfConversion::buildMux(MachineBasicBlock *B,
+ MachineBasicBlock::iterator At, const TargetRegisterClass *DRC,
+ unsigned PredR, unsigned TR, unsigned TSR, unsigned FR, unsigned FSR) {
+ unsigned Opc = 0;
+ switch (DRC->getID()) {
+ case Hexagon::IntRegsRegClassID:
+ Opc = Hexagon::C2_mux;
+ break;
+ case Hexagon::DoubleRegsRegClassID:
+ Opc = Hexagon::PS_pselect;
+ break;
+ case Hexagon::VectorRegsRegClassID:
+ Opc = Hexagon::PS_vselect;
+ break;
+ case Hexagon::VecDblRegsRegClassID:
+ Opc = Hexagon::PS_wselect;
+ break;
+ case Hexagon::VectorRegs128BRegClassID:
+ Opc = Hexagon::PS_vselect_128B;
+ break;
+ case Hexagon::VecDblRegs128BRegClassID:
+ Opc = Hexagon::PS_wselect_128B;
+ break;
+ default:
+ llvm_unreachable("unexpected register type");
+ }
+ const MCInstrDesc &D = HII->get(Opc);
+
+ DebugLoc DL = B->findBranchDebugLoc();
+ unsigned MuxR = MRI->createVirtualRegister(DRC);
+ BuildMI(*B, At, DL, D, MuxR)
+ .addReg(PredR)
+ .addReg(TR, 0, TSR)
+ .addReg(FR, 0, FSR);
+ return MuxR;
+}
+
void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
const FlowPattern &FP) {
// Visit all PHI nodes in the WhereB block and generate MUX instructions
@@ -759,40 +821,25 @@ void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
TR = SR, TSR = SSR;
else if (FR == 0)
FR = SR, FSR = SSR;
- assert(TR && FR);
-
- using namespace Hexagon;
-
- unsigned DR = PN->getOperand(0).getReg();
- const TargetRegisterClass *RC = MRI->getRegClass(DR);
- unsigned Opc = 0;
- if (RC == &IntRegsRegClass)
- Opc = C2_mux;
- else if (RC == &DoubleRegsRegClass)
- Opc = PS_pselect;
- else if (RC == &VectorRegsRegClass)
- Opc = PS_vselect;
- else if (RC == &VecDblRegsRegClass)
- Opc = PS_wselect;
- else if (RC == &VectorRegs128BRegClass)
- Opc = PS_vselect_128B;
- else if (RC == &VecDblRegs128BRegClass)
- Opc = PS_wselect_128B;
- else
- llvm_unreachable("unexpected register type");
- const MCInstrDesc &D = HII->get(Opc);
-
- MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator();
- DebugLoc DL;
- if (MuxAt != FP.SplitB->end())
- DL = MuxAt->getDebugLoc();
- unsigned MuxR = MRI->createVirtualRegister(RC);
- BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR)
- .addReg(FP.PredR)
- .addReg(TR, 0, TSR)
- .addReg(FR, 0, FSR);
-
- PN->addOperand(MachineOperand::CreateReg(MuxR, false));
+
+ assert(TR || FR);
+ unsigned MuxR = 0, MuxSR = 0;
+
+ if (TR && FR) {
+ unsigned DR = PN->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DR);
+ MuxR = buildMux(FP.SplitB, FP.SplitB->getFirstTerminator(), RC,
+ FP.PredR, TR, TSR, FR, FSR);
+ } else if (TR) {
+ MuxR = TR;
+ MuxSR = TSR;
+ } else {
+ MuxR = FR;
+ MuxSR = FSR;
+ }
+
+ PN->addOperand(MachineOperand::CreateReg(MuxR, false, false, false, false,
+ false, false, MuxSR));
PN->addOperand(MachineOperand::CreateMBB(FP.SplitB));
}
}
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 8f070d842b8c..d8ba5dcd35ad 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -362,14 +362,16 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
if (Range.empty())
return;
- auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> bool {
+ // Return two booleans: { def-modifes-reg, def-covers-reg }.
+ auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair<bool,bool> {
if (!Op.isReg() || !Op.isDef())
- return false;
+ return { false, false };
unsigned DR = Op.getReg(), DSR = Op.getSubReg();
if (!TargetRegisterInfo::isVirtualRegister(DR) || DR != Reg)
- return false;
+ return { false, false };
LaneBitmask SLM = getLaneMask(DR, DSR);
- return (SLM & LM).any();
+ LaneBitmask A = SLM & LM;
+ return { A.any(), A == SLM };
};
// The splitting step will create pairs of predicated definitions without
@@ -453,20 +455,27 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
// Remove <dead> flags from all defs that are not dead after live range
// extension, and collect all def operands. They will be used to generate
// the necessary implicit uses.
+ // At the same time, add <dead> flag to all defs that are actually dead.
+ // This can happen, for example, when a mux with identical inputs is
+ // replaced with a COPY: the use of the predicate register disappears and
+ // the dead can become dead.
std::set<RegisterRef> DefRegs;
for (auto &Seg : Range) {
if (!Seg.start.isRegister())
continue;
MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
for (auto &Op : DefI->operands()) {
- if (Seg.start.isDead() || !IsRegDef(Op))
- continue;
- DefRegs.insert(Op);
- Op.setIsDead(false);
+ auto P = IsRegDef(Op);
+ if (P.second && Seg.end.isDead()) {
+ Op.setIsDead(true);
+ } else if (P.first) {
+ DefRegs.insert(Op);
+ Op.setIsDead(false);
+ }
}
}
- // Finally, add implicit uses to each predicated def that is reached
+ // Now, add implicit uses to each predicated def that is reached
// by other defs.
for (auto &Seg : Range) {
if (!Seg.start.isRegister() || !Range.liveAt(Seg.start.getPrevSlot()))
@@ -486,6 +495,7 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
for (RegisterRef R : ImpUses)
MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub);
}
+
}
void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) {
@@ -595,9 +605,9 @@ MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp,
.addReg(SrcOp.getReg(), SrcState, SrcOp.getSubReg());
} else {
MIB = BuildMI(B, At, DL, HII->get(Opc))
- .addReg(DstR, DstState, DstSR)
- .addReg(PredOp.getReg(), PredState, PredOp.getSubReg())
- .addOperand(SrcOp);
+ .addReg(DstR, DstState, DstSR)
+ .addReg(PredOp.getReg(), PredState, PredOp.getSubReg())
+ .add(SrcOp);
}
DEBUG(dbgs() << "created an initial copy: " << *MIB);
@@ -622,6 +632,12 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
bool ReadUndef = MD.isUndef();
MachineBasicBlock::iterator At = MI;
+ auto updateRegs = [&UpdRegs] (const MachineInstr &MI) -> void {
+ for (auto &Op : MI.operands())
+ if (Op.isReg())
+ UpdRegs.insert(Op.getReg());
+ };
+
// If this is a mux of the same register, just replace it with COPY.
// Ideally, this would happen earlier, so that register coalescing would
// see it.
@@ -630,6 +646,8 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
if (ST.isReg() && SF.isReg()) {
RegisterRef RT(ST);
if (RT == RegisterRef(SF)) {
+ // Copy regs to update first.
+ updateRegs(MI);
MI.setDesc(HII->get(TargetOpcode::COPY));
unsigned S = getRegState(ST);
while (MI.getNumOperands() > 1)
@@ -651,9 +669,7 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
LIS->InsertMachineInstrInMaps(*TfrF);
// Will need to recalculate live intervals for all registers in MI.
- for (auto &Op : MI.operands())
- if (Op.isReg())
- UpdRegs.insert(Op.getReg());
+ updateRegs(MI);
removeInstr(MI);
return true;
@@ -828,7 +844,7 @@ void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp,
while (Ox < NP) {
MachineOperand &MO = MI.getOperand(Ox);
if (!MO.isReg() || !MO.isImplicit())
- MB.addOperand(MO);
+ MB.add(MO);
Ox++;
}
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index dfd1f1d4f886..015d3b840e6f 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -190,5 +190,5 @@ void HexagonFixupHwLoops::useExtLoopInstr(MachineFunction &MF,
MIB = BuildMI(*MBB, MII, DL, TII->get(newOp));
for (unsigned i = 0; i < MII->getNumOperands(); ++i)
- MIB.addOperand(MII->getOperand(i));
+ MIB.add(MII->getOperand(i));
}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index a3f6273f9f67..0e2380f4316a 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -301,16 +301,30 @@ static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
// the frame creation/destruction instructions.
if (MO.isFI())
return true;
- if (!MO.isReg())
- continue;
- unsigned R = MO.getReg();
- // Virtual registers will need scavenging, which then may require
- // a stack slot.
- if (TargetRegisterInfo::isVirtualRegister(R))
- return true;
- for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S)
- if (CSR[*S])
+ if (MO.isReg()) {
+ unsigned R = MO.getReg();
+ // Virtual registers will need scavenging, which then may require
+ // a stack slot.
+ if (TargetRegisterInfo::isVirtualRegister(R))
return true;
+ for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S)
+ if (CSR[*S])
+ return true;
+ continue;
+ }
+ if (MO.isRegMask()) {
+ // A regmask would normally have all callee-saved registers marked
+ // as preserved, so this check would not be needed, but in case of
+ // ever having other regmasks (for other calling conventions),
+ // make sure they would be processed correctly.
+ const uint32_t *BM = MO.getRegMask();
+ for (int x = CSR.find_first(); x >= 0; x = CSR.find_next(x)) {
+ unsigned R = x;
+ // If this regmask does not preserve a CSR, a frame will be needed.
+ if (!(BM[R/32] & (1u << (R%32))))
+ return true;
+ }
+ }
}
}
return false;
@@ -1473,8 +1487,7 @@ bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B,
return false;
unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
- BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR)
- .addOperand(MI->getOperand(1));
+ BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1));
BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR)
.addReg(TmpR, RegState::Kill);
@@ -1646,8 +1659,15 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
LivePhysRegs LPR(&HRI);
LPR.addLiveIns(B);
SmallVector<std::pair<unsigned, const MachineOperand*>,2> Clobbers;
- for (auto R = B.begin(); R != It; ++R)
+ for (auto R = B.begin(); R != It; ++R) {
+ Clobbers.clear();
LPR.stepForward(*R, Clobbers);
+ // Dead defs are recorded in Clobbers, but are not automatically removed
+ // from the live set.
+ for (auto &C : Clobbers)
+ if (C.second->isReg() && C.second->isDead())
+ LPR.removeReg(C.first);
+ }
DebugLoc DL = MI->getDebugLoc();
unsigned SrcR = MI->getOperand(2).getReg();
@@ -1985,9 +2005,9 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
// class HaveRC and a new class NewRC. Return nullptr if a common class
// cannot be found, otherwise return the resulting class. If HaveRC is
// nullptr, assume that it is still unset.
- auto getCommonRC = [&HRI] (const TargetRegisterClass *HaveRC,
- const TargetRegisterClass *NewRC)
- -> const TargetRegisterClass* {
+ auto getCommonRC =
+ [](const TargetRegisterClass *HaveRC,
+ const TargetRegisterClass *NewRC) -> const TargetRegisterClass * {
if (HaveRC == nullptr || HaveRC == NewRC)
return NewRC;
// Different classes, both non-null. Pick the more general one.
@@ -2221,7 +2241,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) {
const DebugLoc &DL = SI.getDebugLoc();
CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
- .addOperand(SrcOp);
+ .add(SrcOp);
}
++StartIt;
diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp
index bb5e379ce014..c99ad5130aef 100644
--- a/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -197,13 +197,13 @@ bool HexagonGenExtract::convert(Instruction *In) {
// It is still ok to generate extract, but only if the mask eliminates
// those bits (i.e. M does not have any bits set beyond U).
APInt C = APInt::getHighBitsSet(BW, BW-U);
- if (M.intersects(C) || !APIntOps::isMask(W, M))
+ if (M.intersects(C) || !M.isMask(W))
return false;
} else {
// Check if M starts with a contiguous sequence of W times 1 bits. Get
// the low U bits of M (which eliminates the 0 bits shifted in on the
// left), and check if the result is APInt's "mask":
- if (!APIntOps::isMask(W, M.getLoBits(U)))
+ if (!M.getLoBits(U).isMask(W))
return false;
}
@@ -221,11 +221,8 @@ bool HexagonGenExtract::convert(Instruction *In) {
bool HexagonGenExtract::visitBlock(BasicBlock *B) {
// Depth-first, bottom-up traversal.
- DomTreeNode *DTN = DT->getNode(B);
- typedef GraphTraits<DomTreeNode*> GTN;
- typedef GTN::ChildIteratorType Iter;
- for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
- visitBlock((*I)->getBlock());
+ for (auto *DTN : children<DomTreeNode*>(DT->getNode(B)))
+ visitBlock(DTN->getBlock());
// Allow limiting the number of generated extracts for debugging purposes.
bool HasCutoff = ExtractCutoff.getPosition();
diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp
index 5a8e392d1275..54d99d399f88 100644
--- a/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -947,11 +947,8 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
BlockDefs.insert(InsDefs);
}
- MachineDomTreeNode *N = MDT->getNode(B);
- typedef GraphTraits<MachineDomTreeNode*> GTN;
- typedef GTN::ChildIteratorType ChildIter;
- for (ChildIter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
- MachineBasicBlock *SB = (*I)->getBlock();
+ for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) {
+ MachineBasicBlock *SB = DTN->getBlock();
collectInBlock(SB, AVs);
}
@@ -1422,9 +1419,9 @@ bool HexagonGenInsert::generateInserts() {
bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
bool Changed = false;
- typedef GraphTraits<MachineDomTreeNode*> GTN;
- for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
- Changed |= removeDeadCode(*I);
+
+ for (auto *DTN : children<MachineDomTreeNode*>(N))
+ Changed |= removeDeadCode(DTN);
MachineBasicBlock *B = N->getBlock();
std::vector<MachineInstr*> Instrs;
diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp
index a718df9c70ab..85222944c77c 100644
--- a/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -324,9 +324,9 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
if (!MxOpc)
continue;
BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR)
- .addReg(MX.PredR)
- .addOperand(*MX.SrcT)
- .addOperand(*MX.SrcF);
+ .addReg(MX.PredR)
+ .add(*MX.SrcT)
+ .add(*MX.SrcF);
B.erase(MX.Def1);
B.erase(MX.Def2);
Changed = true;
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index e477dcc0f64a..86a8089401c2 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -100,6 +100,7 @@ namespace {
MachineRegisterInfo *MRI;
MachineDominatorTree *MDT;
const HexagonInstrInfo *TII;
+ const HexagonRegisterInfo *TRI;
#ifndef NDEBUG
static int Counter;
#endif
@@ -381,7 +382,9 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
MLI = &getAnalysis<MachineLoopInfo>();
MRI = &MF.getRegInfo();
MDT = &getAnalysis<MachineDominatorTree>();
- TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
+ TII = HST.getInstrInfo();
+ TRI = HST.getRegisterInfo();
for (auto &L : *MLI)
if (!L->getParentLoop()) {
@@ -960,24 +963,21 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
/// \brief Return true if the operation is invalid within hardware loop.
bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
bool IsInnerHWLoop) const {
-
// Call is not allowed because the callee may use a hardware loop except for
// the case when the call never returns.
if (MI->getDesc().isCall())
return !TII->doesNotReturn(*MI);
// Check if the instruction defines a hardware loop register.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef())
- continue;
- unsigned R = MO.getReg();
- if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 ||
- R == Hexagon::LC1 || R == Hexagon::SA1))
- return true;
- if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1))
+ using namespace Hexagon;
+ static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 };
+ static const unsigned Regs1[] = { LC1, SA1 };
+ auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01))
+ : makeArrayRef(Regs1, array_lengthof(Regs1));
+ for (unsigned R : CheckRegs)
+ if (MI->modifiesRegister(R, TRI))
return true;
- }
+
return false;
}
@@ -1511,7 +1511,7 @@ bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
int64_t V1, V2;
if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
return false;
- TV = V2 | (V1 << 32);
+ TV = V2 | (static_cast<uint64_t>(V1) << 32);
break;
}
case TargetOpcode::REG_SEQUENCE: {
diff --git a/lib/Target/Hexagon/HexagonIICHVX.td b/lib/Target/Hexagon/HexagonIICHVX.td
new file mode 100644
index 000000000000..4081a225832b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIICHVX.td
@@ -0,0 +1,102 @@
+//===--- HexagonIICHVX.td -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Though all these itinerary classes exist for V60 onwards, they are being
+// listed here as 'HVXV62Itin' because itinerary class description prior to V62
+// doesn't include operand cycle info. In future, I plan to merge them
+// together and call it 'HVXItin'.
+//
+class HVXV62Itin {
+ list<InstrItinData> HVXV62Itin_list = [
+ InstrItinData<COPROC_VMEM_vtc_long_SLOT01,
+ [InstrStage<1, [SLOT0, SLOT1]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<COPROC_VX_vtc_long_SLOT23,
+ [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<COPROC_VX_vtc_SLOT23,
+ [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<CVI_VA, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE,CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VA_DV, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF, CVI_MPY01]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX_LONG, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX_LATE, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX_DV_LONG, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX_DV, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX_DV_SLOT2, [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX_DV_SLOT2_LONG_EARLY,
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP_LONG, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP_VS_EARLY, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP_VS_LONG, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP_VS, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP_VS_LONG_EARLY,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP_DV, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VS, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VINLANESAT, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VM_LD, [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VM_TMP_LD, [InstrStage<1,[SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>],[1, 1, 1, 1, 10]>,
+ InstrItinData<CVI_VM_CUR_LD, [InstrStage<1,[SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VM_VP_LDU, [InstrStage<1,[SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VM_ST, [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VM_NEW_ST, [InstrStage<1,[SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VM_STU, [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_HIST, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [1, 1, 1, 1]>];
+}
diff --git a/lib/Target/Hexagon/HexagonIICScalar.td b/lib/Target/Hexagon/HexagonIICScalar.td
new file mode 100644
index 000000000000..e69cfbdad688
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIICScalar.td
@@ -0,0 +1,164 @@
+//===--- HexagonIICScalar.td ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// These itinerary class descriptions are based on the instruction timing
+// classes as per V62. Curretnly, they are just extracted from
+// HexagonScheduleV62.td but will soon be auto-generated by HexagonGen.py.
+
+class ScalarItin {
+ list<InstrItinData> ScalarItin_list = [
+ InstrItinData<ALU32_2op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 1]>,
+ InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1]>,
+ InstrItinData<ALU32_3op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 1]>,
+ InstrItinData<ALU32_3op_tc_2_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1]>,
+ InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1]>,
+ InstrItinData<ALU32_ADDI_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 1]>,
+
+ // ALU64
+ InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [1, 1, 1]>,
+ InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+
+ // CR -> System
+ InstrItinData<CR_tc_2_SLOT3 , [InstrStage<1, [SLOT3]>], [2, 1, 1]>,
+ InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<1, [SLOT3]>], [2, 1, 1]>,
+ InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<1, [SLOT3]>], [3, 1, 1]>,
+
+ // Jump (conditional/unconditional/return etc)
+ InstrItinData<CR_tc_2early_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<CJ_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<J_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1, 1]>,
+
+ // JR
+ InstrItinData<J_tc_2early_SLOT2 , [InstrStage<1, [SLOT2]>], [2, 1, 1]>,
+ InstrItinData<J_tc_3stall_SLOT2 , [InstrStage<1, [SLOT2]>], [3, 1, 1]>,
+
+ // Extender
+ InstrItinData<EXTENDER_tc_1_SLOT0123, [InstrStage<1,
+ [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1, 1]>,
+
+ // Load
+ InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [3, 1]>,
+ InstrItinData<LD_tc_ld_pi_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [3, 1]>,
+ InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<1, [SLOT0]>], [4, 1]>,
+ InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>], [3, 1]>,
+
+ // M
+ InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [1, 1, 1]>,
+ InstrItinData<M_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<M_tc_2_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<M_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+ InstrItinData<M_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+ InstrItinData<M_tc_3x_acc_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [4, 1, 1]>,
+ InstrItinData<M_tc_3or4x_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [4, 1, 1]>,
+ InstrItinData<M_tc_3stall_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+
+ // Store
+ InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [1, 1, 1]>,
+ InstrItinData<ST_tc_st_pi_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>],
+ [1, 1, 1]>,
+ InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<1, [SLOT0]>], [3, 1, 1]>,
+ InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>], [3, 1, 1]>,
+ InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>], [1, 1, 1]>,
+ InstrItinData<ST_tc_st_pi_SLOT0 , [InstrStage<1, [SLOT0]>], [1, 1, 1]>,
+
+ // S
+ InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [1, 1, 1]>,
+ InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ // The S_2op_tc_3x_SLOT23 slots are 4 cycles on v60.
+ InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [4, 1, 1]>,
+ InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [1, 1, 1]>,
+ InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+ InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+ InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+
+ // New Value Compare Jump
+ InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<1, [SLOT0]>],
+ [3, 1, 1, 1]>,
+
+ // Mem ops
+ InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [1, 1, 1, 1]>,
+
+ // Endloop
+ InstrItinData<J_tc_2early_SLOT0123, [InstrStage<1, [SLOT_ENDLOOP]>],
+ [2]>,
+ InstrItinData<MAPPING_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>],
+ [1, 1, 1, 1]>,
+
+ // Duplex and Compound
+ InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>], [1, 1, 1]>,
+ InstrItinData<COMPOUND_CJ_ARCHDEPSLOT,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 1]>,
+ InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 1]>,
+ // Misc
+ InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>],
+ [1, 1, 1]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>],
+ [1, 1, 1]>,
+ InstrItinData<PSEUDOM , [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 1]>];
+}
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index f6012d29d422..8e10c521a77d 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -123,6 +123,12 @@ private:
bool isAlignedMemNode(const MemSDNode *N) const;
bool isPositiveHalfWord(const SDNode *N) const;
+ // DAG preprocessing functions.
+ void ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes);
+ void ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes);
+ void ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes);
+ void ppHoistZextI1(std::vector<SDNode*> &&Nodes);
+
SmallDenseMap<SDNode *,int> RootWeights;
SmallDenseMap<SDNode *,int> RootHeights;
SmallDenseMap<const Value *,int> GAUsesInFunction;
@@ -932,55 +938,21 @@ void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) {
void HexagonDAGToDAGISel::Select(SDNode *N) {
- if (N->isMachineOpcode()) {
- N->setNodeId(-1);
- return; // Already selected.
- }
+ if (N->isMachineOpcode())
+ return N->setNodeId(-1); // Already selected.
switch (N->getOpcode()) {
- case ISD::Constant:
- SelectConstant(N);
- return;
-
- case ISD::ConstantFP:
- SelectConstantFP(N);
- return;
-
- case ISD::FrameIndex:
- SelectFrameIndex(N);
- return;
-
- case ISD::BITCAST:
- SelectBitcast(N);
- return;
-
- case ISD::SHL:
- SelectSHL(N);
- return;
-
- case ISD::LOAD:
- SelectLoad(N);
- return;
-
- case ISD::STORE:
- SelectStore(N);
- return;
-
- case ISD::MUL:
- SelectMul(N);
- return;
-
- case ISD::ZERO_EXTEND:
- SelectZeroExtend(N);
- return;
-
- case ISD::INTRINSIC_W_CHAIN:
- SelectIntrinsicWChain(N);
- return;
-
- case ISD::INTRINSIC_WO_CHAIN:
- SelectIntrinsicWOChain(N);
- return;
+ case ISD::Constant: return SelectConstant(N);
+ case ISD::ConstantFP: return SelectConstantFP(N);
+ case ISD::FrameIndex: return SelectFrameIndex(N);
+ case ISD::BITCAST: return SelectBitcast(N);
+ case ISD::SHL: return SelectSHL(N);
+ case ISD::LOAD: return SelectLoad(N);
+ case ISD::STORE: return SelectStore(N);
+ case ISD::MUL: return SelectMul(N);
+ case ISD::ZERO_EXTEND: return SelectZeroExtend(N);
+ case ISD::INTRINSIC_W_CHAIN: return SelectIntrinsicWChain(N);
+ case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N);
}
SelectCode(N);
@@ -1010,15 +982,52 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
}
-void HexagonDAGToDAGISel::PreprocessISelDAG() {
+static bool isMemOPCandidate(SDNode *I, SDNode *U) {
+ // I is an operand of U. Check if U is an arithmetic (binary) operation
+ // usable in a memop, where the other operand is a loaded value, and the
+ // result of U is stored in the same location.
+
+ if (!U->hasOneUse())
+ return false;
+ unsigned Opc = U->getOpcode();
+ switch (Opc) {
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND:
+ case ISD::OR:
+ break;
+ default:
+ return false;
+ }
+
+ SDValue S0 = U->getOperand(0);
+ SDValue S1 = U->getOperand(1);
+ SDValue SY = (S0.getNode() == I) ? S1 : S0;
+
+ SDNode *UUse = *U->use_begin();
+ if (UUse->getNumValues() != 1)
+ return false;
+
+ // Check if one of the inputs to U is a load instruction and the output
+ // is used by a store instruction. If so and they also have the same
+ // base pointer, then don't preoprocess this node sequence as it
+ // can be matched to a memop.
+ SDNode *SYNode = SY.getNode();
+ if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
+ SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
+ SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
+ if (LDBasePtr == STBasePtr)
+ return true;
+ }
+ return false;
+}
+
+
+// Transform: (or (select c x 0) z) -> (select c (or x z) z)
+// (or (select c 0 y) z) -> (select c z (or y z))
+void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
SelectionDAG &DAG = *CurDAG;
- std::vector<SDNode*> Nodes;
- for (SDNode &Node : DAG.allnodes())
- Nodes.push_back(&Node);
- // Simplify: (or (select c x 0) z) -> (select c (or x z) z)
- // (or (select c 0 y) z) -> (select c z (or y z))
- // This may not be the right thing for all targets, so do it here.
for (auto I : Nodes) {
if (I->getOpcode() != ISD::OR)
continue;
@@ -1056,18 +1065,22 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
}
}
}
+}
+
+// Transform: (store ch val (add x (add (shl y c) e)))
+// to: (store ch val (add x (shl (add y d) c))),
+// where e = (shl d c) for some integer d.
+// The purpose of this is to enable generation of loads/stores with
+// shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
+// value c must be 0, 1 or 2.
+void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
+ SelectionDAG &DAG = *CurDAG;
- // Transform: (store ch addr (add x (add (shl y c) e)))
- // to: (store ch addr (add x (shl (add y d) c))),
- // where e = (shl d c) for some integer d.
- // The purpose of this is to enable generation of loads/stores with
- // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
- // value c must be 0, 1 or 2.
for (auto I : Nodes) {
if (I->getOpcode() != ISD::STORE)
continue;
- // I matched: (store ch addr Off)
+ // I matched: (store ch val Off)
SDValue Off = I->getOperand(2);
// Off needs to match: (add x (add (shl y c) (shl d c))))
if (Off.getOpcode() != ISD::ADD)
@@ -1109,15 +1122,192 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
ReplaceNode(T0.getNode(), NewShl.getNode());
}
+}
+
+// Transform: (load ch (add x (and (srl y c) Mask)))
+// to: (load ch (add x (shl (srl y d) d-c)))
+// where
+// Mask = 00..0 111..1 0.0
+// | | +-- d-c 0s, and d-c is 0, 1 or 2.
+// | +-------- 1s
+// +-------------- at most c 0s
+// Motivating example:
+// DAG combiner optimizes (add x (shl (srl y 5) 2))
+// to (add x (and (srl y 3) 1FFFFFFC))
+// which results in a constant-extended and(##...,lsr). This transformation
+// undoes this simplification for cases where the shl can be folded into
+// an addressing mode.
+void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
+ SelectionDAG &DAG = *CurDAG;
+
+ for (SDNode *N : Nodes) {
+ unsigned Opc = N->getOpcode();
+ if (Opc != ISD::LOAD && Opc != ISD::STORE)
+ continue;
+ SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
+ // Addr must match: (add x T0)
+ if (Addr.getOpcode() != ISD::ADD)
+ continue;
+ SDValue T0 = Addr.getOperand(1);
+ // T0 must match: (and T1 Mask)
+ if (T0.getOpcode() != ISD::AND)
+ continue;
+
+ // We have an AND.
+ //
+ // Check the first operand. It must be: (srl y c).
+ SDValue S = T0.getOperand(0);
+ if (S.getOpcode() != ISD::SRL)
+ continue;
+ ConstantSDNode *SN = dyn_cast<ConstantSDNode>(S.getOperand(1).getNode());
+ if (SN == nullptr)
+ continue;
+ if (SN->getAPIntValue().getBitWidth() != 32)
+ continue;
+ uint32_t CV = SN->getZExtValue();
+
+ // Check the second operand: the supposed mask.
+ ConstantSDNode *MN = dyn_cast<ConstantSDNode>(T0.getOperand(1).getNode());
+ if (MN == nullptr)
+ continue;
+ if (MN->getAPIntValue().getBitWidth() != 32)
+ continue;
+ uint32_t Mask = MN->getZExtValue();
+ // Examine the mask.
+ uint32_t TZ = countTrailingZeros(Mask);
+ uint32_t M1 = countTrailingOnes(Mask >> TZ);
+ uint32_t LZ = countLeadingZeros(Mask);
+ // Trailing zeros + middle ones + leading zeros must equal the width.
+ if (TZ + M1 + LZ != 32)
+ continue;
+ // The number of trailing zeros will be encoded in the addressing mode.
+ if (TZ > 2)
+ continue;
+ // The number of leading zeros must be at most c.
+ if (LZ > CV)
+ continue;
+
+ // All looks good.
+ SDValue Y = S.getOperand(0);
+ EVT VT = Addr.getValueType();
+ SDLoc dl(S);
+ // TZ = D-C, so D = TZ+C.
+ SDValue D = DAG.getConstant(TZ+CV, dl, VT);
+ SDValue DC = DAG.getConstant(TZ, dl, VT);
+ SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
+ SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
+ ReplaceNode(T0.getNode(), NewShl.getNode());
+ }
+}
+
+// Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
+// (op ... 1 ...))
+void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
+ SelectionDAG &DAG = *CurDAG;
+
+ for (SDNode *N : Nodes) {
+ unsigned Opc = N->getOpcode();
+ if (Opc != ISD::ZERO_EXTEND)
+ continue;
+ SDValue OpI1 = N->getOperand(0);
+ EVT OpVT = OpI1.getValueType();
+ if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
+ continue;
+ for (auto I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *U = *I;
+ if (U->getNumValues() != 1)
+ continue;
+ EVT UVT = U->getValueType(0);
+ if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
+ continue;
+ if (isMemOPCandidate(N, U))
+ continue;
+
+ // Potentially simplifiable operation.
+ unsigned I1N = I.getOperandNo();
+ SmallVector<SDValue,2> Ops(U->getNumOperands());
+ for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
+ Ops[i] = U->getOperand(i);
+ EVT BVT = Ops[I1N].getValueType();
+
+ SDLoc dl(U);
+ SDValue C0 = DAG.getConstant(0, dl, BVT);
+ SDValue C1 = DAG.getConstant(1, dl, BVT);
+ SDValue If0, If1;
+
+ if (isa<MachineSDNode>(U)) {
+ unsigned UseOpc = U->getMachineOpcode();
+ Ops[I1N] = C0;
+ If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
+ Ops[I1N] = C1;
+ If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
+ } else {
+ unsigned UseOpc = U->getOpcode();
+ Ops[I1N] = C0;
+ If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
+ Ops[I1N] = C1;
+ If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
+ }
+ SDValue Sel = DAG.getNode(ISD::SELECT, dl, UVT, OpI1, If1, If0);
+ DAG.ReplaceAllUsesWith(U, Sel.getNode());
+ }
+ }
+}
+
+void HexagonDAGToDAGISel::PreprocessISelDAG() {
+ // Repack all nodes before calling each preprocessing function,
+ // because each of them can modify the set of nodes.
+ auto getNodes = [this] () -> std::vector<SDNode*> {
+ std::vector<SDNode*> T;
+ T.reserve(CurDAG->allnodes_size());
+ for (SDNode &N : CurDAG->allnodes())
+ T.push_back(&N);
+ return T;
+ };
+
+ // Transform: (or (select c x 0) z) -> (select c (or x z) z)
+ // (or (select c 0 y) z) -> (select c z (or y z))
+ ppSimplifyOrSelect0(getNodes());
+
+ // Transform: (store ch val (add x (add (shl y c) e)))
+ // to: (store ch val (add x (shl (add y d) c))),
+ // where e = (shl d c) for some integer d.
+ // The purpose of this is to enable generation of loads/stores with
+ // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
+ // value c must be 0, 1 or 2.
+ ppAddrReorderAddShl(getNodes());
+
+ // Transform: (load ch (add x (and (srl y c) Mask)))
+ // to: (load ch (add x (shl (srl y d) d-c)))
+ // where
+ // Mask = 00..0 111..1 0.0
+ // | | +-- d-c 0s, and d-c is 0, 1 or 2.
+ // | +-------- 1s
+ // +-------------- at most c 0s
+ // Motivating example:
+ // DAG combiner optimizes (add x (shl (srl y 5) 2))
+ // to (add x (and (srl y 3) 1FFFFFFC))
+ // which results in a constant-extended and(##...,lsr). This transformation
+ // undoes this simplification for cases where the shl can be folded into
+ // an addressing mode.
+ ppAddrRewriteAndSrl(getNodes());
+
+ // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
+ // (op ... 1 ...))
+ ppHoistZextI1(getNodes());
+
+ DEBUG_WITH_TYPE("isel", {
+ dbgs() << "Preprocessed (Hexagon) selection DAG:";
+ CurDAG->dump();
+ });
if (EnableAddressRebalancing) {
rebalanceAddressTrees();
- DEBUG(
- dbgs() << "************* SelectionDAG after preprocessing: ***********\n";
+ DEBUG_WITH_TYPE("isel", {
+ dbgs() << "Address tree balanced selection DAG:";
CurDAG->dump();
- dbgs() << "************* End SelectionDAG after preprocessing ********\n";
- );
+ });
}
}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index e87e1e6a7e0f..418dd71aeb4b 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -256,7 +256,9 @@ static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
return false;
}
- if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+ if (LocVT == MVT::i1) {
+ LocVT = MVT::i32;
+ } else if (LocVT == MVT::i8 || LocVT == MVT::i16) {
LocVT = MVT::i32;
ValVT = MVT::i32;
if (ArgFlags.isSExt())
@@ -483,9 +485,7 @@ static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
}
}
- unsigned Offset = State.AllocateStack(4, 4);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
+ return true;
}
static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
@@ -498,9 +498,7 @@ static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
}
}
- unsigned Offset = State.AllocateStack(8, 8);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
+ return true;
}
static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
@@ -511,7 +509,6 @@ static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
bool UseHVX = HST.useHVXOps();
bool UseHVXDbl = HST.useHVXDblOps();
- unsigned OffSiz = 64;
if (LocVT == MVT::v16i32) {
if (unsigned Reg = State.AllocateReg(Hexagon::V0)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
@@ -523,18 +520,14 @@ static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
- OffSiz = 128;
} else if (LocVT == MVT::v64i32) {
if (unsigned Reg = State.AllocateReg(Hexagon::W0)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
- OffSiz = 256;
}
- unsigned Offset = State.AllocateStack(OffSiz, OffSiz);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
+ return true;
}
void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) {
@@ -590,6 +583,16 @@ static bool isHvxVectorType(MVT Ty) {
}
}
+bool
+HexagonTargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
+}
+
// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
// passed by value, the function prototype is modified to return void and
// the value is stored in memory pointed by a pointer passed by caller.
@@ -644,11 +647,11 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
/// LowerCallResult - Lower the result values of an ISD::CALL into the
/// appropriate copies out of appropriate physical registers. This assumes that
-/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// Chain/Glue are the input chain/glue to use, and that TheCall is the call
/// being lowered. Returns a SDNode with the same number of values as the
/// ISD::CALL.
SDValue HexagonTargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
@@ -671,21 +674,24 @@ SDValue HexagonTargetLowering::LowerCallResult(
// predicate register as the call result.
auto &MRI = DAG.getMachineFunction().getRegInfo();
SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
- MVT::i32, InFlag);
+ MVT::i32, Glue);
// FR0 = (Value, Chain, Glue)
unsigned PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
FR0.getValue(0), FR0.getValue(2));
// TPR = (Chain, Glue)
- RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1,
- TPR.getValue(1));
+ // Don't glue this CopyFromReg, because it copies from a virtual
+ // register. If it is glued to the call, InstrEmitter will add it
+ // as an implicit def to the call (EmitMachineNode).
+ RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1);
+ Glue = TPR.getValue(1);
} else {
RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
- RVLocs[i].getValVT(), InFlag);
+ RVLocs[i].getValVT(), Glue);
+ Glue = RetVal.getValue(2);
}
InVals.push_back(RetVal.getValue(0));
Chain = RetVal.getValue(1);
- InFlag = RetVal.getValue(2);
}
return Chain;
@@ -840,16 +846,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+ SDValue Glue;
if (!IsTailCall) {
SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true);
Chain = DAG.getCALLSEQ_START(Chain, C, dl);
+ Glue = Chain.getValue(1);
}
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
// The Glue is necessary since all emitted instructions must be
// stuck together.
- SDValue Glue;
if (!IsTailCall) {
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
@@ -902,6 +909,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
}
+ const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
if (Glue.getNode())
Ops.push_back(Glue);
@@ -1054,6 +1065,18 @@ SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
}
+// Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
+// is marked as having side-effects, while the register read on Hexagon does
+// not have any. TableGen refuses to accept the direct pattern from that node
+// to the A4_tfrcpp.
+SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDLoc dl(Op);
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+ return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
+}
+
SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
@@ -1140,10 +1163,25 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
EVT RegVT = VA.getLocVT();
if (RegVT == MVT::i8 || RegVT == MVT::i16 ||
RegVT == MVT::i32 || RegVT == MVT::f32) {
- unsigned VReg =
+ unsigned VReg =
RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
+ // Treat values of type MVT::i1 specially: they are passed in
+ // registers of type i32, but they need to remain as values of
+ // type i1 for consistency of the argument lowering.
+ if (VA.getValVT() == MVT::i1) {
+ // Generate a copy into a predicate register and use the value
+ // of the register as the "InVal".
+ unsigned PReg =
+ RegInfo.createVirtualRegister(&Hexagon::PredRegsRegClass);
+ SDNode *T = DAG.getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1,
+ Copy.getValue(0));
+ Copy = DAG.getCopyToReg(Copy.getValue(1), dl, PReg, SDValue(T, 0));
+ Copy = DAG.getCopyFromReg(Copy, dl, PReg, MVT::i1);
+ }
+ InVals.push_back(Copy);
+ Chain = Copy.getValue(1);
} else if (RegVT == MVT::i64 || RegVT == MVT::f64) {
unsigned VReg =
RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
@@ -1217,7 +1255,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
InVals.push_back(FIN);
} else {
InVals.push_back(
- DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo()));
+ DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
}
}
}
@@ -1272,17 +1310,6 @@ static bool isSExtFree(SDValue N) {
return false;
}
-SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
- SDLoc dl(Op);
- SDValue InpVal = Op.getOperand(0);
- if (isa<ConstantSDNode>(InpVal)) {
- uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue();
- return DAG.getTargetConstant(countPopulation(V), dl, MVT::i64);
- }
- SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal);
- return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut);
-}
-
SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -1571,9 +1598,10 @@ HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
SDValue
HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
- GlobalAddressSDNode *GA, SDValue *InFlag, EVT PtrVT, unsigned ReturnReg,
+ GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags) const {
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDLoc dl(GA);
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
@@ -1585,23 +1613,21 @@ HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
// 2. Callee which in this case is the Global address value.
// 3. Registers live into the call.In this case its R0, as we
// have just one argument to be passed.
- // 4. InFlag if there is any.
+ // 4. Glue.
// Note: The order is important.
- if (InFlag) {
- SDValue Ops[] = { Chain, TGA,
- DAG.getRegister(Hexagon::R0, PtrVT), *InFlag };
- Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
- } else {
- SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT)};
- Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
- }
+ const auto &HRI = *Subtarget.getRegisterInfo();
+ const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
+ DAG.getRegisterMask(Mask), Glue };
+ Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
// Inform MFI that function has calls.
MFI.setAdjustsStack(true);
- SDValue Flag = Chain.getValue(1);
- return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
+ Glue = Chain.getValue(1);
+ return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
}
//
@@ -1694,7 +1720,7 @@ HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
InFlag = Chain.getValue(1);
- return GetDynamicTLSAddr(DAG, Chain, GA, &InFlag, PtrVT,
+ return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
Hexagon::R0, HexagonII::MO_GDPLT);
}
@@ -1821,6 +1847,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
@@ -1891,7 +1918,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::i8, Promote);
setOperationAction(ISD::CTPOP, MVT::i16, Promote);
setOperationAction(ISD::CTPOP, MVT::i32, Promote);
- setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+ setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
+ setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+ setOperationAction(ISD::BSWAP, MVT::i64, Legal);
// We custom lower i64 to i64 mul, so that it is not considered as a legal
// operation. There is a pattern that will match i64 mul and transform it
@@ -1901,7 +1933,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
for (unsigned IntExpOp :
{ ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
- ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
+ ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
ISD::SMUL_LOHI, ISD::UMUL_LOHI }) {
setOperationAction(IntExpOp, MVT::i32, Expand);
setOperationAction(IntExpOp, MVT::i64, Expand);
@@ -2268,7 +2300,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP";
case HexagonISD::JT: return "HexagonISD::JT";
case HexagonISD::PACKHL: return "HexagonISD::PACKHL";
- case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT";
case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB";
case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH";
@@ -2296,6 +2327,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::VSRLW: return "HexagonISD::VSRLW";
case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH";
case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW";
+ case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
case HexagonISD::OP_END: break;
}
return nullptr;
@@ -2968,11 +3000,11 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
- case ISD::CTPOP: return LowerCTPOP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG);
+ case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
}
}
@@ -3026,37 +3058,25 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
return std::make_pair(0U, &Hexagon::DoubleRegsRegClass);
}
case 'q': // q0-q3
- switch (VT.SimpleTy) {
+ switch (VT.getSizeInBits()) {
default:
- llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
- case MVT::v1024i1:
- case MVT::v512i1:
- case MVT::v32i16:
- case MVT::v16i32:
- case MVT::v64i8:
- case MVT::v8i64:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled vector size");
+ case 512:
return std::make_pair(0U, &Hexagon::VecPredRegsRegClass);
+ case 1024:
+ return std::make_pair(0U, &Hexagon::VecPredRegs128BRegClass);
}
case 'v': // V0-V31
- switch (VT.SimpleTy) {
+ switch (VT.getSizeInBits()) {
default:
- llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
- case MVT::v16i32:
- case MVT::v32i16:
- case MVT::v64i8:
- case MVT::v8i64:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled vector size");
+ case 512:
return std::make_pair(0U, &Hexagon::VectorRegsRegClass);
- case MVT::v32i32:
- case MVT::v64i16:
- case MVT::v16i64:
- case MVT::v128i8:
+ case 1024:
if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl)
return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass);
return std::make_pair(0U, &Hexagon::VecDblRegsRegClass);
- case MVT::v256i8:
- case MVT::v128i16:
- case MVT::v64i32:
- case MVT::v32i64:
+ case 2048:
return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass);
}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index a8ed29e585d4..fb8f0ba6b057 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -50,7 +50,6 @@ namespace HexagonISD {
JT, // Jump table.
CP, // Constant pool.
- POPCOUNT,
COMBINE,
PACKHL,
VSPLATB,
@@ -86,6 +85,7 @@ namespace HexagonISD {
TC_RETURN,
EH_RETURN,
DCFETCH,
+ READCYCLE,
OP_END
};
@@ -146,6 +146,7 @@ namespace HexagonISD {
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
SDValue
@@ -163,7 +164,7 @@ namespace HexagonISD {
SDValue LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const;
SDValue GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
- GlobalAddressSDNode *GA, SDValue *InFlag, EVT PtrVT,
+ GlobalAddressSDNode *GA, SDValue InFlag, EVT PtrVT,
unsigned ReturnReg, unsigned char OperandFlags) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
@@ -179,12 +180,16 @@ namespace HexagonISD {
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ bool CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
diff --git a/lib/Target/Hexagon/HexagonInstrAlias.td b/lib/Target/Hexagon/HexagonInstrAlias.td
deleted file mode 100644
index 7283d94ee759..000000000000
--- a/lib/Target/Hexagon/HexagonInstrAlias.td
+++ /dev/null
@@ -1,652 +0,0 @@
-//==- HexagonInstrAlias.td - Hexagon Instruction Aliases ---*- tablegen -*--==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Hexagon Instruction Mappings
-//===----------------------------------------------------------------------===//
-
-
-def : InstAlias<"memb({GP}+#$addr) = $Nt.new",
- (S2_storerbnewgp u16_0Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memh({GP}+#$addr) = $Nt.new",
- (S2_storerhnewgp u16_1Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memw({GP}+#$addr) = $Nt.new",
- (S2_storerinewgp u16_2Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memb({GP}+#$addr) = $Nt",
- (S2_storerbgp u16_0Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memh({GP}+#$addr) = $Nt",
- (S2_storerhgp u16_1Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memh({GP}+#$addr) = $Nt.h",
- (S2_storerfgp u16_1Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memw({GP}+#$addr) = $Nt",
- (S2_storerigp u16_2Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memd({GP}+#$addr) = $Nt",
- (S2_storerdgp u16_3Imm:$addr, DoubleRegs:$Nt)>;
-
-def : InstAlias<"$Nt = memb({GP}+#$addr)",
- (L2_loadrbgp IntRegs:$Nt, u16_0Imm:$addr)>;
-def : InstAlias<"$Nt = memub({GP}+#$addr)",
- (L2_loadrubgp IntRegs:$Nt, u16_0Imm:$addr)>;
-def : InstAlias<"$Nt = memh({GP}+#$addr)",
- (L2_loadrhgp IntRegs:$Nt, u16_1Imm:$addr)>;
-def : InstAlias<"$Nt = memuh({GP}+#$addr)",
- (L2_loadruhgp IntRegs:$Nt, u16_1Imm:$addr)>;
-def : InstAlias<"$Nt = memw({GP}+#$addr)",
- (L2_loadrigp IntRegs:$Nt, u16_2Imm:$addr)>;
-def : InstAlias<"$Nt = memd({GP}+#$addr)",
- (L2_loadrdgp DoubleRegs:$Nt, u16_3Imm:$addr)>;
-
-// Alias of: memXX($Rs+#XX) = $Rt to memXX($Rs) = $Rt
-def : InstAlias<"memb($Rs) = $Rt",
- (S2_storerb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memh($Rs) = $Rt",
- (S2_storerh_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memh($Rs) = $Rt.h",
- (S2_storerf_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memw($Rs) = $Rt",
- (S2_storeri_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memb($Rs) = $Rt.new",
- (S2_storerbnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memh($Rs) = $Rt.new",
- (S2_storerhnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memw($Rs) = $Rt.new",
- (S2_storerinew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memb($Rs) = #$S8",
- (S4_storeirb_io IntRegs:$Rs, 0, s8_0Ext:$S8), 0>;
-
-def : InstAlias<"memh($Rs) = #$S8",
- (S4_storeirh_io IntRegs:$Rs, 0, s8_0Ext:$S8), 0>;
-
-def : InstAlias<"memw($Rs) = #$S8",
- (S4_storeiri_io IntRegs:$Rs, 0, s8_0Ext:$S8), 0>;
-
-def : InstAlias<"memd($Rs) = $Rtt",
- (S2_storerd_io IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
-
-def : InstAlias<"memb($Rs) = setbit(#$U5)",
- (L4_ior_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
-
-def : InstAlias<"memh($Rs) = setbit(#$U5)",
- (L4_ior_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
-
-def : InstAlias<"memw($Rs) = setbit(#$U5)",
- (L4_ior_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
-
-def : InstAlias<"memb($Rs) = clrbit(#$U5)",
- (L4_iand_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
-
-def : InstAlias<"memh($Rs) = clrbit(#$U5)",
- (L4_iand_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
-
-def : InstAlias<"memw($Rs) = clrbit(#$U5)",
- (L4_iand_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
-
-// Alias of: $Rd = memXX($Rs+#XX) to $Rd = memXX($Rs)
-def : InstAlias<"$Rd = memb($Rs)",
- (L2_loadrb_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rd = memub($Rs)",
- (L2_loadrub_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rd = memh($Rs)",
- (L2_loadrh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rd = memuh($Rs)",
- (L2_loadruh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rd = memw($Rs)",
- (L2_loadri_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rdd = memd($Rs)",
- (L2_loadrd_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rd = memubh($Rs)",
- (L2_loadbzw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rdd = memubh($Rs)",
- (L2_loadbzw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rd = membh($Rs)",
- (L2_loadbsw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rdd = membh($Rs)",
- (L2_loadbsw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rdd = memb_fifo($Rs)",
- (L2_loadalignb_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rdd = memh_fifo($Rs)",
- (L2_loadalignh_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
-
-// Alias of: if ($Pt) $Rd = memXX($Rs + #$u6_X)
-// to: if ($Pt) $Rd = memXX($Rs)
-def : InstAlias<"if ($Pt) $Rd = memb($Rs)",
- (L2_ploadrbt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt) $Rd = memub($Rs)",
- (L2_ploadrubt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt) $Rd = memh($Rs)",
- (L2_ploadrht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt) $Rd = memuh($Rs)",
- (L2_ploadruht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt) $Rd = memw($Rs)",
- (L2_ploadrit_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt) $Rdd = memd($Rs)",
- (L2_ploadrdt_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-// Alias of: if ($Pt) memXX($Rs + #$u6_X) = $Rt
-// to: if ($Pt) memXX($Rs) = $Rt
-def : InstAlias<"if ($Pt) memb($Rs) = $Rt",
- (S2_pstorerbt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memh($Rs) = $Rt",
- (S2_pstorerht_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memh($Rs) = $Rt.h",
- (S2_pstorerft_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memw($Rs) = $Rt",
- (S2_pstorerit_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memd($Rs) = $Rtt",
- (S2_pstorerdt_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
-
-def : InstAlias<"if ($Pt) memb($Rs) = $Rt.new",
- (S2_pstorerbnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memh($Rs) = $Rt.new",
- (S2_pstorerhnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memw($Rs) = $Rt.new",
- (S2_pstorerinewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt.new) memb($Rs) = $Rt.new",
- (S4_pstorerbnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt.new) memh($Rs) = $Rt.new",
- (S4_pstorerhnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt.new) memw($Rs) = $Rt.new",
- (S4_pstorerinewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-
-// Alias of: if (!$Pt) $Rd = memXX($Rs + #$u6_X)
-// to: if (!$Pt) $Rd = memXX($Rs)
-def : InstAlias<"if (!$Pt) $Rd = memb($Rs)",
- (L2_ploadrbf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt) $Rd = memub($Rs)",
- (L2_ploadrubf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt) $Rd = memh($Rs)",
- (L2_ploadrhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt) $Rd = memuh($Rs)",
- (L2_ploadruhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt) $Rd = memw($Rs)",
- (L2_ploadrif_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt) $Rdd = memd($Rs)",
- (L2_ploadrdf_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-// Alias of: if (!$Pt) memXX($Rs + #$u6_X) = $Rt
-// to: if (!$Pt) memXX($Rs) = $Rt
-def : InstAlias<"if (!$Pt) memb($Rs) = $Rt",
- (S2_pstorerbf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt) memh($Rs) = $Rt",
- (S2_pstorerhf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.h",
- (S2_pstorerff_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt) memw($Rs) = $Rt",
- (S2_pstorerif_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt) memd($Rs) = $Rtt",
- (S2_pstorerdf_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
-
-def : InstAlias<"if (!$Pt) memb($Rs) = $Rt.new",
- (S2_pstorerbnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.new",
- (S2_pstorerhnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt) memw($Rs) = $Rt.new",
- (S2_pstorerinewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt.new) memb($Rs) = $Rt.new",
- (S4_pstorerbnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt.new) memh($Rs) = $Rt.new",
- (S4_pstorerhnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt.new) memw($Rs) = $Rt.new",
- (S4_pstorerinewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memb($Rs) = #$S6",
- (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if ($Pt) memh($Rs) = #$S6",
- (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if ($Pt) memw($Rs) = #$S6",
- (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if ($Pt.new) memb($Rs) = #$S6",
- (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if ($Pt.new) memh($Rs) = #$S6",
- (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if ($Pt.new) memw($Rs) = #$S6",
- (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if (!$Pt) memb($Rs) = #$S6",
- (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if (!$Pt) memh($Rs) = #$S6",
- (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if (!$Pt) memw($Rs) = #$S6",
- (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if (!$Pt.new) memb($Rs) = #$S6",
- (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if (!$Pt.new) memh($Rs) = #$S6",
- (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if (!$Pt.new) memw($Rs) = #$S6",
- (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-// Alias of: memXX($Rs + $u6_X) |= $Rt, also &=, +=, -=
-// to: memXX($Rs) |= $Rt
-def : InstAlias<"memb($Rs) &= $Rt",
- (L4_and_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memb($Rs) |= $Rt",
- (L4_or_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memb($Rs) += $Rt",
- (L4_add_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memb($Rs) -= $Rt",
- (L4_sub_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memb($Rs) += #$U5",
- (L4_iadd_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memb($Rs) -= #$U5",
- (L4_isub_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memh($Rs) &= $Rt",
- (L4_and_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memh($Rs) |= $Rt",
- (L4_or_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memh($Rs) += $Rt",
- (L4_add_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memh($Rs) -= $Rt",
- (L4_sub_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memh($Rs) += #$U5",
- (L4_iadd_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memh($Rs) -= #$U5",
- (L4_isub_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memw($Rs) &= $Rt",
- (L4_and_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memw($Rs) |= $Rt",
- (L4_or_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memw($Rs) += $Rt",
- (L4_add_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memw($Rs) -= $Rt",
- (L4_sub_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memw($Rs) += #$U5",
- (L4_iadd_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memw($Rs) -= #$U5",
- (L4_isub_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
- Requires<[UseMEMOP]>;
-
-//
-// Alias of: if ($Pv.new) memX($Rs) = $Rt
-// to: if (p3.new) memX(r17 + #0) = $Rt
-def : InstAlias<"if ($Pv.new) memb($Rs) = $Rt",
- (S4_pstorerbtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt",
- (S4_pstorerhtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt.h",
- (S4_pstorerftnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pv.new) memw($Rs) = $Rt",
- (S4_pstoreritnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pv.new) memd($Rs) = $Rtt",
- (S4_pstorerdtnew_io
- PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
-
-def : InstAlias<"if (!$Pv.new) memb($Rs) = $Rt",
- (S4_pstorerbfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt",
- (S4_pstorerhfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt.h",
- (S4_pstorerffnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pv.new) memw($Rs) = $Rt",
- (S4_pstorerifnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pv.new) memd($Rs) = $Rtt",
- (S4_pstorerdfnew_io
- PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
-
-//
-// Alias of: if ($Pt.new) $Rd = memub($Rs) -- And if (!$Pt.new) ...
-// to: if ($Pt.new) $Rd = memub($Rs + #$u6_0)
-def : InstAlias<"if ($Pt.new) $Rd = memub($Rs)",
- (L2_ploadrubtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt.new) $Rd = memb($Rs)",
- (L2_ploadrbtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt.new) $Rd = memh($Rs)",
- (L2_ploadrhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt.new) $Rd = memuh($Rs)",
- (L2_ploadruhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt.new) $Rd = memw($Rs)",
- (L2_ploadritnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt.new) $Rdd = memd($Rs)",
- (L2_ploadrdtnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt.new) $Rd = memub($Rs)",
- (L2_ploadrubfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt.new) $Rd = memb($Rs)",
- (L2_ploadrbfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt.new) $Rd = memh($Rs)",
- (L2_ploadrhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt.new) $Rd = memuh($Rs)",
- (L2_ploadruhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt.new) $Rd = memw($Rs)",
- (L2_ploadrifnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt.new) $Rdd = memd($Rs)",
- (L2_ploadrdfnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"dcfetch($Rs)",
- (Y2_dcfetchbo IntRegs:$Rs, 0), 0>;
-
-// Alias of some insn mappings, others must be handled by the parser
-def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
- (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
-def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
- (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
-
-// Rd=neg(Rs) is aliased to Rd=sub(#0,Rs)
-def : InstAlias<"$Rd = neg($Rs)",
- (A2_subri IntRegs:$Rd, 0, IntRegs:$Rs), 0>;
-
-def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
-def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
-def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
-def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
-
-def : InstAlias<"$Pd = $Ps",
- (C2_or PredRegs:$Pd, PredRegs:$Ps, PredRegs:$Ps), 0>;
-
-def : InstAlias<"$Rdd = vaddb($Rss, $Rtt)",
- (A2_vaddub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 1>;
-
-def : InstAlias<"$Rdd = vsubb($Rss,$Rtt)",
- (A2_vsubub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 0>;
-
-def : InstAlias<"$Rd = mpyui($Rs,$Rt)",
- (M2_mpyi IntRegs:$Rd, IntRegs:$Rs, IntRegs:$Rt), 0>;
-
-// Assembler mapped insns: cmp.lt(a,b) -> cmp.gt(b,a)
-def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
- (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
-def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
- (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
-
-// maps if (!Pu) jumpr Rs -> if (!Pu) jumpr:nt Rs
-def : InstAlias<"if (!$Pu) jumpr $Rs",
- (J2_jumprf PredRegs:$Pu, IntRegs:$Rs)>,
- Requires<[HasV60T]>;
-
-// maps if (Pu) jumpr Rs -> if (Pu) jumpr:nt Rs
-def : InstAlias<"if ($Pu) jumpr $Rs",
- (J2_jumprt PredRegs:$Pu, IntRegs:$Rs)>,
- Requires<[HasV60T]>;
-
-// maps if (!Pu) jump $r15_2 -> if (!Pu) jump:nt $r15_2
-def : InstAlias<"if (!$Pu) jump $r15_2",
- (J2_jumpf PredRegs:$Pu, brtarget:$r15_2)>,
- Requires<[HasV60T]>;
-
-// maps if (Pu) jump $r15_2 -> if (Pu) jump:nt $r15_2
-def : InstAlias<"if ($Pu) jump $r15_2",
- (J2_jumpt PredRegs:$Pu, brtarget:$r15_2)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if ($src) jump $r15_2",
- (J2_jumpt PredRegs:$src, brtarget:$r15_2), 0>;
-
-def : InstAlias<"if (!$src) jump $r15_2",
- (J2_jumpf PredRegs:$src, brtarget:$r15_2), 0>;
-
-def : InstAlias<"if ($src1) jumpr $src2",
- (J2_jumprt PredRegs:$src1, IntRegs:$src2), 0>;
-
-def : InstAlias<"if (!$src1) jumpr $src2",
- (J2_jumprf PredRegs:$src1, IntRegs:$src2), 0>;
-
-// maps Vdd = Vss to Vdd = V6_vassignp(Vss)
-def : InstAlias<"$Vdd = $Vss",
- (V6_vassignp VecDblRegs:$Vdd, VecDblRegs:$Vss)>,
- Requires<[HasV60T]>;
-
-// maps Vd = #0 to Vd = vxor(Vd, Vd)
-def : InstAlias<"$Vd = #0",
- (V6_vxor VectorRegs:$Vd, VectorRegs:$Vd, VectorRegs:$Vd)>,
- Requires<[HasV60T]>;
-
-// maps Vdd = #0 to Vdd = vsub(Vdd, Vdd)
-def : InstAlias<"$Vdd = #0",
- (V6_vsubw_dv VecDblRegs:$Vdd, VecDblRegs:$Vdd, VecDblRegs:$Vdd)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd = vcmp.eq($Vu.uh, $Vv.uh)" -> "$Qd = vcmp.eq($Vu.h, $Vv.h)"
-def : InstAlias<"$Qd = vcmp.eq($Vu.uh, $Vv.uh)",
- (V6_veqh VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd &= vcmp.eq($Vu.uh, $Vv.uh)" -> "$Qd &= vcmp.eq($Vu.h, $Vv.h)"
-def : InstAlias<"$Qd &= vcmp.eq($Vu.uh, $Vv.uh)",
- (V6_veqh_and VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd |= vcmp.eq($Vu.uh, $Vv.uh)" -> "$Qd |= vcmp.eq($Vu.h, $Vv.h)"
-def : InstAlias<"$Qd |= vcmp.eq($Vu.uh, $Vv.uh)",
- (V6_veqh_or VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd ^= vcmp.eq($Vu.uh, $Vv.uh)" -> "$Qd ^= vcmp.eq($Vu.h, $Vv.h)"
-def : InstAlias<"$Qd ^= vcmp.eq($Vu.uh, $Vv.uh)",
- (V6_veqh_xor VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd = vcmp.eq($Vu.uw, $Vv.uw)" -> "$Qd = vcmp.eq($Vu.w, $Vv.w)"
-def : InstAlias<"$Qd = vcmp.eq($Vu.uw, $Vv.uw)",
- (V6_veqw VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd &= vcmp.eq($Vu.uw, $Vv.uw)" -> "$Qd &= vcmp.eq($Vu.w, $Vv.w)"
-def : InstAlias<"$Qd &= vcmp.eq($Vu.uw, $Vv.uw)",
- (V6_veqw_and VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd |= vcmp.eq($Vu.uw, $Vv.uw)" -> "$Qd |= vcmp.eq($Vu.w, $Vv.w)"
-def : InstAlias<"$Qd |= vcmp.eq($Vu.uw, $Vv.uw)",
- (V6_veqh_or VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd ^= vcmp.eq($Vu.uw, $Vv.uw)" -> "$Qd ^= vcmp.eq($Vu.w, $Vv.w)"
-def : InstAlias<"$Qd ^= vcmp.eq($Vu.uw, $Vv.uw)",
- (V6_veqw_xor VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd = vcmp.eq($Vu.ub, $Vv.ub)" -> "$Qd = vcmp.eq($Vu.b, $Vv.b)"
-def : InstAlias<"$Qd = vcmp.eq($Vu.ub, $Vv.ub)",
- (V6_veqb VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd &= vcmp.eq($Vu.ub, $Vv.ub)" -> "$Qd &= vcmp.eq($Vu.b, $Vv.b)"
-def : InstAlias<"$Qd &= vcmp.eq($Vu.ub, $Vv.ub)",
- (V6_veqb_and VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd |= vcmp.eq($Vu.ub, $Vv.ub)" -> "$Qd |= vcmp.eq($Vu.b, $Vv.b)"
-def : InstAlias<"$Qd |= vcmp.eq($Vu.ub, $Vv.ub)",
- (V6_veqb_or VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd ^= vcmp.eq($Vu.ub, $Vv.ub)" -> "$Qd ^= vcmp.eq($Vu.b, $Vv.b)"
-def : InstAlias<"$Qd ^= vcmp.eq($Vu.ub, $Vv.ub)",
- (V6_veqb_xor VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Rd.w = vextract($Vu, $Rs)" -> "$Rd = vextract($Vu, $Rs)"
-def : InstAlias<"$Rd.w = vextract($Vu, $Rs)",
- (V6_extractw IntRegs:$Rd, VectorRegs:$Vu, IntRegs:$Rs)>,
- Requires<[HasV60T]>;
-
-// Mapping from vtrans2x2(Vy32,Vx32,Rt32) to vshuff(Vy32,Vx32,Rt32)
-def : InstAlias<"vtrans2x2($Vy, $Vx, $Rt)",
- (V6_vshuff VectorRegs:$Vy, VectorRegs:$Vx, IntRegs:$Rt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"$Vt=vmem($Rs)",
- (V6_vL32b_ai VectorRegs:$Vt, IntRegs:$Rs, 0)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"$Vt=vmem($Rs):nt",
- (V6_vL32b_nt_ai VectorRegs:$Vt, IntRegs:$Rs, 0)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"vmem($Rs)=$Vt",
- (V6_vS32b_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"vmem($Rs):nt=$Vt",
- (V6_vS32b_nt_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"vmem($Rs)=$Vt.new",
- (V6_vS32b_new_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"vmem($Rs):nt=$Vt.new",
- (V6_vS32b_nt_new_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if ($Qv) vmem($Rs)=$Vt",
- (V6_vS32b_qpred_ai VecPredRegs:$Qv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if (!$Qv) vmem($Rs)=$Vt",
- (V6_vS32b_nqpred_ai VecPredRegs:$Qv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if ($Qv) vmem($Rs):nt=$Vt",
- (V6_vS32b_nt_qpred_ai VecPredRegs:$Qv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if (!$Qv) vmem($Rs):nt=$Vt",
- (V6_vS32b_nt_nqpred_ai VecPredRegs:$Qv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if ($Pv) vmem($Rs)=$Vt",
- (V6_vS32b_pred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if (!$Pv) vmem($Rs)=$Vt",
- (V6_vS32b_npred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if ($Pv) vmem($Rs):nt=$Vt",
- (V6_vS32b_nt_pred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if (!$Pv) vmem($Rs):nt=$Vt",
- (V6_vS32b_nt_npred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"$Vt=vmemu($Rs)",
- (V6_vL32Ub_ai VectorRegs:$Vt, IntRegs:$Rs, 0)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"vmemu($Rs)=$Vt",
- (V6_vS32Ub_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if ($Pv) vmemu($Rs)=$Vt",
- (V6_vS32Ub_pred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if (!$Pv) vmemu($Rs)=$Vt",
- (V6_vS32Ub_npred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-
diff --git a/lib/Target/Hexagon/HexagonInstrEnc.td b/lib/Target/Hexagon/HexagonInstrEnc.td
deleted file mode 100644
index 280832fd167f..000000000000
--- a/lib/Target/Hexagon/HexagonInstrEnc.td
+++ /dev/null
@@ -1,1019 +0,0 @@
-class Enc_COPROC_VX_3op_v<bits<15> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
-
- let Inst{31-16} = { opc{14-4}, src2};
- let Inst{13-0} = { opc{3}, src1, opc{2-0}, dst};
-}
-
-class V6_vtmpyb_enc : Enc_COPROC_VX_3op_v<0b000110010000000>;
-class V6_vtmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000001>;
-class V6_vdmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110010000010>;
-class V6_vrmpyub_enc : Enc_COPROC_VX_3op_v<0b000110010000011>;
-class V6_vrmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000100>;
-class V6_vdsaduh_enc : Enc_COPROC_VX_3op_v<0b000110010000101>;
-class V6_vdmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000110>;
-class V6_vdmpybus_dv_enc : Enc_COPROC_VX_3op_v<0b000110010000111>;
-class V6_vtmpyb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001000>;
-class V6_vtmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001001>;
-class V6_vtmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001010>;
-class V6_vdmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001011>;
-class V6_vrmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001100>;
-class V6_vrmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001101>;
-class V6_vdmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001110>;
-class V6_vdmpybus_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001111>;
-class V6_vdmpyhsusat_enc : Enc_COPROC_VX_3op_v<0b000110010010000>;
-class V6_vdmpyhsuisat_enc : Enc_COPROC_VX_3op_v<0b000110010010001>;
-class V6_vdmpyhsat_enc : Enc_COPROC_VX_3op_v<0b000110010010010>;
-class V6_vdmpyhisat_enc : Enc_COPROC_VX_3op_v<0b000110010010011>;
-class V6_vdmpyhb_dv_enc : Enc_COPROC_VX_3op_v<0b000110010010100>;
-class V6_vmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010010101>;
-class V6_vmpabus_enc : Enc_COPROC_VX_3op_v<0b000110010010110>;
-class V6_vmpahb_enc : Enc_COPROC_VX_3op_v<0b000110010010111>;
-class V6_vdmpyhsusat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011000>;
-class V6_vdmpyhsuisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011001>;
-class V6_vdmpyhisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011010>;
-class V6_vdmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011011>;
-class V6_vdmpyhb_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011100>;
-class V6_vmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011101>;
-class V6_vmpabus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011110>;
-class V6_vmpahb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011111>;
-class V6_vmpyh_enc : Enc_COPROC_VX_3op_v<0b000110010100000>;
-class V6_vmpyhss_enc : Enc_COPROC_VX_3op_v<0b000110010100001>;
-class V6_vmpyhsrs_enc : Enc_COPROC_VX_3op_v<0b000110010100010>;
-class V6_vmpyuh_enc : Enc_COPROC_VX_3op_v<0b000110010100011>;
-class V6_vmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101000>;
-class V6_vmpyuh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101001>;
-class V6_vmpyiwb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101010>;
-class V6_vmpyiwh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101011>;
-class V6_vmpyihb_enc : Enc_COPROC_VX_3op_v<0b000110010110000>;
-class V6_vror_enc : Enc_COPROC_VX_3op_v<0b000110010110001>;
-class V6_vasrw_enc : Enc_COPROC_VX_3op_v<0b000110010110101>;
-class V6_vasrh_enc : Enc_COPROC_VX_3op_v<0b000110010110110>;
-class V6_vaslw_enc : Enc_COPROC_VX_3op_v<0b000110010110111>;
-class V6_vdsaduh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111000>;
-class V6_vmpyihb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111001>;
-class V6_vaslw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111010>;
-class V6_vasrw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111101>;
-class V6_vaslh_enc : Enc_COPROC_VX_3op_v<0b000110011000000>;
-class V6_vlsrw_enc : Enc_COPROC_VX_3op_v<0b000110011000001>;
-class V6_vlsrh_enc : Enc_COPROC_VX_3op_v<0b000110011000010>;
-class V6_vmpyiwh_enc : Enc_COPROC_VX_3op_v<0b000110011000111>;
-class V6_vmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110011001000>;
-class V6_vmpyiwb_enc : Enc_COPROC_VX_3op_v<0b000110011010000>;
-class V6_vtmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110011010100>;
-class V6_vmpyub_enc : Enc_COPROC_VX_3op_v<0b000110011100000>;
-class V6_vrmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000000>;
-class V6_vrmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000001>;
-class V6_vrmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000010>;
-class V6_vdmpyhvsat_enc : Enc_COPROC_VX_3op_v<0b000111000000011>;
-class V6_vmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000100>;
-class V6_vmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000101>;
-class V6_vmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000110>;
-class V6_vmpyhv_enc : Enc_COPROC_VX_3op_v<0b000111000000111>;
-class V6_vrmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001000>;
-class V6_vrmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001001>;
-class V6_vrmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001010>;
-class V6_vdmpyhvsat_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001011>;
-class V6_vmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001100>;
-class V6_vmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001101>;
-class V6_vmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001110>;
-class V6_vmpyhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001111>;
-class V6_vmpyuhv_enc : Enc_COPROC_VX_3op_v<0b000111000010000>;
-class V6_vmpyhvsrs_enc : Enc_COPROC_VX_3op_v<0b000111000010001>;
-class V6_vmpyhus_enc : Enc_COPROC_VX_3op_v<0b000111000010010>;
-class V6_vmpabusv_enc : Enc_COPROC_VX_3op_v<0b000111000010011>;
-class V6_vmpyih_enc : Enc_COPROC_VX_3op_v<0b000111000010100>;
-class V6_vand_enc : Enc_COPROC_VX_3op_v<0b000111000010101>;
-class V6_vor_enc : Enc_COPROC_VX_3op_v<0b000111000010110>;
-class V6_vxor_enc : Enc_COPROC_VX_3op_v<0b000111000010111>;
-class V6_vmpyuhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011000>;
-class V6_vmpyhus_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011001>;
-class V6_vmpyih_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011100>;
-class V6_vmpyiewuh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011101>;
-class V6_vmpyowh_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011110>;
-class V6_vmpyowh_rnd_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011111>;
-class V6_vaddw_enc : Enc_COPROC_VX_3op_v<0b000111000100000>;
-class V6_vaddubsat_enc : Enc_COPROC_VX_3op_v<0b000111000100001>;
-class V6_vadduhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100010>;
-class V6_vaddhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100011>;
-class V6_vaddwsat_enc : Enc_COPROC_VX_3op_v<0b000111000100100>;
-class V6_vsubb_enc : Enc_COPROC_VX_3op_v<0b000111000100101>;
-class V6_vsubh_enc : Enc_COPROC_VX_3op_v<0b000111000100110>;
-class V6_vsubw_enc : Enc_COPROC_VX_3op_v<0b000111000100111>;
-class V6_vmpyiewh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000101000>;
-class V6_vsububsat_enc : Enc_COPROC_VX_3op_v<0b000111000110000>;
-class V6_vsubuhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110001>;
-class V6_vsubhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110010>;
-class V6_vsubwsat_enc : Enc_COPROC_VX_3op_v<0b000111000110011>;
-class V6_vaddb_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110100>;
-class V6_vaddh_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110101>;
-class V6_vaddw_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110110>;
-class V6_vaddubsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110111>;
-class V6_vadduhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000000>;
-class V6_vaddhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000001>;
-class V6_vaddwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000010>;
-class V6_vsubb_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000011>;
-class V6_vsubh_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000100>;
-class V6_vsubw_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000101>;
-class V6_vsububsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000110>;
-class V6_vsubuhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000111>;
-class V6_vsubhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010000>;
-class V6_vsubwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010001>;
-class V6_vaddubh_enc : Enc_COPROC_VX_3op_v<0b000111001010010>;
-class V6_vadduhw_enc : Enc_COPROC_VX_3op_v<0b000111001010011>;
-class V6_vaddhw_enc : Enc_COPROC_VX_3op_v<0b000111001010100>;
-class V6_vsububh_enc : Enc_COPROC_VX_3op_v<0b000111001010101>;
-class V6_vsubuhw_enc : Enc_COPROC_VX_3op_v<0b000111001010110>;
-class V6_vsubhw_enc : Enc_COPROC_VX_3op_v<0b000111001010111>;
-class V6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b000111001100000>;
-class V6_vabsdiffh_enc : Enc_COPROC_VX_3op_v<0b000111001100001>;
-class V6_vabsdiffuh_enc : Enc_COPROC_VX_3op_v<0b000111001100010>;
-class V6_vabsdiffw_enc : Enc_COPROC_VX_3op_v<0b000111001100011>;
-class V6_vavgub_enc : Enc_COPROC_VX_3op_v<0b000111001100100>;
-class V6_vavguh_enc : Enc_COPROC_VX_3op_v<0b000111001100101>;
-class V6_vavgh_enc : Enc_COPROC_VX_3op_v<0b000111001100110>;
-class V6_vavgw_enc : Enc_COPROC_VX_3op_v<0b000111001100111>;
-class V6_vnavgub_enc : Enc_COPROC_VX_3op_v<0b000111001110000>;
-class V6_vnavgh_enc : Enc_COPROC_VX_3op_v<0b000111001110001>;
-class V6_vnavgw_enc : Enc_COPROC_VX_3op_v<0b000111001110010>;
-class V6_vavgubrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110011>;
-class V6_vavguhrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110100>;
-class V6_vavghrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110101>;
-class V6_vavgwrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110110>;
-class V6_vmpabuuv_enc : Enc_COPROC_VX_3op_v<0b000111001110111>;
-class V6_vminub_enc : Enc_COPROC_VX_3op_v<0b000111110000001>;
-class V6_vminuh_enc : Enc_COPROC_VX_3op_v<0b000111110000010>;
-class V6_vminh_enc : Enc_COPROC_VX_3op_v<0b000111110000011>;
-class V6_vminw_enc : Enc_COPROC_VX_3op_v<0b000111110000100>;
-class V6_vmaxub_enc : Enc_COPROC_VX_3op_v<0b000111110000101>;
-class V6_vmaxuh_enc : Enc_COPROC_VX_3op_v<0b000111110000110>;
-class V6_vmaxh_enc : Enc_COPROC_VX_3op_v<0b000111110000111>;
-class V6_vmaxw_enc : Enc_COPROC_VX_3op_v<0b000111110010000>;
-class V6_vdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010001>;
-class V6_vrdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010011>;
-class V6_vdealb4w_enc : Enc_COPROC_VX_3op_v<0b000111110010111>;
-class V6_vmpyowh_rnd_enc : Enc_COPROC_VX_3op_v<0b000111110100000>;
-class V6_vshuffeb_enc : Enc_COPROC_VX_3op_v<0b000111110100001>;
-class V6_vshuffob_enc : Enc_COPROC_VX_3op_v<0b000111110100010>;
-class V6_vshufeh_enc : Enc_COPROC_VX_3op_v<0b000111110100011>;
-class V6_vshufoh_enc : Enc_COPROC_VX_3op_v<0b000111110100100>;
-class V6_vshufoeh_enc : Enc_COPROC_VX_3op_v<0b000111110100101>;
-class V6_vshufoeb_enc : Enc_COPROC_VX_3op_v<0b000111110100110>;
-class V6_vcombine_enc : Enc_COPROC_VX_3op_v<0b000111110100111>;
-class V6_vmpyieoh_enc : Enc_COPROC_VX_3op_v<0b000111110110000>;
-class V6_vsathub_enc : Enc_COPROC_VX_3op_v<0b000111110110010>;
-class V6_vsatwh_enc : Enc_COPROC_VX_3op_v<0b000111110110011>;
-class V6_vroundwh_enc : Enc_COPROC_VX_3op_v<0b000111110110100>;
-class V6_vroundwuh_enc : Enc_COPROC_VX_3op_v<0b000111110110101>;
-class V6_vroundhb_enc : Enc_COPROC_VX_3op_v<0b000111110110110>;
-class V6_vroundhub_enc : Enc_COPROC_VX_3op_v<0b000111110110111>;
-class V6_vasrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010000>;
-class V6_vlsrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010001>;
-class V6_vlsrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010010>;
-class V6_vasrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010011>;
-class V6_vaslwv_enc : Enc_COPROC_VX_3op_v<0b000111111010100>;
-class V6_vaslhv_enc : Enc_COPROC_VX_3op_v<0b000111111010101>;
-class V6_vaddb_enc : Enc_COPROC_VX_3op_v<0b000111111010110>;
-class V6_vaddh_enc : Enc_COPROC_VX_3op_v<0b000111111010111>;
-class V6_vmpyiewuh_enc : Enc_COPROC_VX_3op_v<0b000111111100000>;
-class V6_vmpyiowh_enc : Enc_COPROC_VX_3op_v<0b000111111100001>;
-class V6_vpackeb_enc : Enc_COPROC_VX_3op_v<0b000111111100010>;
-class V6_vpackeh_enc : Enc_COPROC_VX_3op_v<0b000111111100011>;
-class V6_vpackhub_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100101>;
-class V6_vpackhb_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100110>;
-class V6_vpackwuh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100111>;
-class V6_vpackwh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111110000>;
-class V6_vpackob_enc : Enc_COPROC_VX_3op_v<0b000111111110001>;
-class V6_vpackoh_enc : Enc_COPROC_VX_3op_v<0b000111111110010>;
-class V6_vmpyewuh_enc : Enc_COPROC_VX_3op_v<0b000111111110101>;
-class V6_vmpyowh_enc : Enc_COPROC_VX_3op_v<0b000111111110111>;
-class V6_extractw_enc : Enc_COPROC_VX_3op_v<0b100100100000001>;
-class M6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b111010001010000>;
-class M6_vabsdiffb_enc : Enc_COPROC_VX_3op_v<0b111010001110000>;
-
-class Enc_COPROC_VX_cmp<bits<13> opc> : OpcodeHexagon {
- bits<2> dst;
- bits<5> src1;
- bits<5> src2;
-
- let Inst{31-16} = { 0b00011, opc{12-7}, src2{4-0} };
- let Inst{13-0} = { opc{6}, src1{4-0}, opc{5-0}, dst{1-0} };
-}
-
-class V6_vandvrt_acc_enc : Enc_COPROC_VX_cmp<0b0010111100000>;
-class V6_vandvrt_enc : Enc_COPROC_VX_cmp<0b0011010010010>;
-class V6_veqb_and_enc : Enc_COPROC_VX_cmp<0b1001001000000>;
-class V6_veqh_and_enc : Enc_COPROC_VX_cmp<0b1001001000001>;
-class V6_veqw_and_enc : Enc_COPROC_VX_cmp<0b1001001000010>;
-class V6_vgtb_and_enc : Enc_COPROC_VX_cmp<0b1001001000100>;
-class V6_vgth_and_enc : Enc_COPROC_VX_cmp<0b1001001000101>;
-class V6_vgtw_and_enc : Enc_COPROC_VX_cmp<0b1001001000110>;
-class V6_vgtub_and_enc : Enc_COPROC_VX_cmp<0b1001001001000>;
-class V6_vgtuh_and_enc : Enc_COPROC_VX_cmp<0b1001001001001>;
-class V6_vgtuw_and_enc : Enc_COPROC_VX_cmp<0b1001001001010>;
-class V6_veqb_or_enc : Enc_COPROC_VX_cmp<0b1001001010000>;
-class V6_veqh_or_enc : Enc_COPROC_VX_cmp<0b1001001010001>;
-class V6_veqw_or_enc : Enc_COPROC_VX_cmp<0b1001001010010>;
-class V6_vgtb_or_enc : Enc_COPROC_VX_cmp<0b1001001010100>;
-class V6_vgth_or_enc : Enc_COPROC_VX_cmp<0b1001001010101>;
-class V6_vgtw_or_enc : Enc_COPROC_VX_cmp<0b1001001010110>;
-class V6_vgtub_or_enc : Enc_COPROC_VX_cmp<0b1001001011000>;
-class V6_vgtuh_or_enc : Enc_COPROC_VX_cmp<0b1001001011001>;
-class V6_vgtuw_or_enc : Enc_COPROC_VX_cmp<0b1001001011010>;
-class V6_veqb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100000>;
-class V6_veqh_xor_enc : Enc_COPROC_VX_cmp<0b1001001100001>;
-class V6_veqw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100010>;
-class V6_vgtb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100100>;
-class V6_vgth_xor_enc : Enc_COPROC_VX_cmp<0b1001001100101>;
-class V6_vgtw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100110>;
-class V6_vgtub_xor_enc : Enc_COPROC_VX_cmp<0b1001001101000>;
-class V6_vgtuh_xor_enc : Enc_COPROC_VX_cmp<0b1001001101001>;
-class V6_vgtuw_xor_enc : Enc_COPROC_VX_cmp<0b1001001101010>;
-class V6_veqb_enc : Enc_COPROC_VX_cmp<0b1111000000000>;
-class V6_veqh_enc : Enc_COPROC_VX_cmp<0b1111000000001>;
-class V6_veqw_enc : Enc_COPROC_VX_cmp<0b1111000000010>;
-class V6_vgtb_enc : Enc_COPROC_VX_cmp<0b1111000000100>;
-class V6_vgth_enc : Enc_COPROC_VX_cmp<0b1111000000101>;
-class V6_vgtw_enc : Enc_COPROC_VX_cmp<0b1111000000110>;
-class V6_vgtub_enc : Enc_COPROC_VX_cmp<0b1111000001000>;
-class V6_vgtuh_enc : Enc_COPROC_VX_cmp<0b1111000001001>;
-class V6_vgtuw_enc : Enc_COPROC_VX_cmp<0b1111000001010>;
-
-class Enc_COPROC_VX_p2op<bits<5> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> dst;
- bits<5> src2;
-
- let Inst{31-16} = { 0b00011110, src1{1-0}, 0b0000, opc{4-3} };
- let Inst{13-0} = { 1, src2{4-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_vaddbq_enc : Enc_COPROC_VX_p2op<0b01000>;
-class V6_vaddhq_enc : Enc_COPROC_VX_p2op<0b01001>;
-class V6_vaddwq_enc : Enc_COPROC_VX_p2op<0b01010>;
-class V6_vaddbnq_enc : Enc_COPROC_VX_p2op<0b01011>;
-class V6_vaddhnq_enc : Enc_COPROC_VX_p2op<0b01100>;
-class V6_vaddwnq_enc : Enc_COPROC_VX_p2op<0b01101>;
-class V6_vsubbq_enc : Enc_COPROC_VX_p2op<0b01110>;
-class V6_vsubhq_enc : Enc_COPROC_VX_p2op<0b01111>;
-class V6_vsubwq_enc : Enc_COPROC_VX_p2op<0b10000>;
-class V6_vsubbnq_enc : Enc_COPROC_VX_p2op<0b10001>;
-class V6_vsubhnq_enc : Enc_COPROC_VX_p2op<0b10010>;
-class V6_vsubwnq_enc : Enc_COPROC_VX_p2op<0b10011>;
-
-class Enc_COPROC_VX_2op<bits<6> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
-
- let Inst{31-16} = { 0b00011110000000, opc{5-4} };
- let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_vabsh_enc : Enc_COPROC_VX_2op<0b000000>;
-class V6_vabsh_sat_enc : Enc_COPROC_VX_2op<0b000001>;
-class V6_vabsw_enc : Enc_COPROC_VX_2op<0b000010>;
-class V6_vabsw_sat_enc : Enc_COPROC_VX_2op<0b000011>;
-class V6_vnot_enc : Enc_COPROC_VX_2op<0b000100>;
-class V6_vdealh_enc : Enc_COPROC_VX_2op<0b000110>;
-class V6_vdealb_enc : Enc_COPROC_VX_2op<0b000111>;
-class V6_vunpackob_enc : Enc_COPROC_VX_2op<0b001000>;
-class V6_vunpackoh_enc : Enc_COPROC_VX_2op<0b001001>;
-class V6_vunpackub_enc : Enc_COPROC_VX_2op<0b010000>;
-class V6_vunpackuh_enc : Enc_COPROC_VX_2op<0b010001>;
-class V6_vunpackb_enc : Enc_COPROC_VX_2op<0b010010>;
-class V6_vunpackh_enc : Enc_COPROC_VX_2op<0b010011>;
-class V6_vshuffh_enc : Enc_COPROC_VX_2op<0b010111>;
-class V6_vshuffb_enc : Enc_COPROC_VX_2op<0b100000>;
-class V6_vzb_enc : Enc_COPROC_VX_2op<0b100001>;
-class V6_vzh_enc : Enc_COPROC_VX_2op<0b100010>;
-class V6_vsb_enc : Enc_COPROC_VX_2op<0b100011>;
-class V6_vsh_enc : Enc_COPROC_VX_2op<0b100100>;
-class V6_vcl0w_enc : Enc_COPROC_VX_2op<0b100101>;
-class V6_vpopcounth_enc : Enc_COPROC_VX_2op<0b100110>;
-class V6_vcl0h_enc : Enc_COPROC_VX_2op<0b100111>;
-class V6_vnormamtw_enc : Enc_COPROC_VX_2op<0b110100>;
-class V6_vnormamth_enc : Enc_COPROC_VX_2op<0b110101>;
-class V6_vassign_enc : Enc_COPROC_VX_2op<0b111111>;
-
-class Enc_COPROC_VMEM_vL32_b_ai<bits<4> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<10> src2;
- bits<4> src2_vector;
-
- let src2_vector = src2{9-6};
- let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
- let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_vL32b_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0000>;
-class V6_vL32b_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0001>;
-class V6_vL32b_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0010>;
-class V6_vL32Ub_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0111>;
-class V6_vL32b_nt_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1000>;
-class V6_vL32b_nt_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1001>;
-class V6_vL32b_nt_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1010>;
-
-class Enc_COPROC_VMEM_vL32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<11> src2;
- bits<4> src2_vector;
-
- let src2_vector = src2{10-7};
- let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
- let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_vL32b_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0000>;
-class V6_vL32b_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0001>;
-class V6_vL32b_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0010>;
-class V6_vL32Ub_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0111>;
-class V6_vL32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1000>;
-class V6_vL32b_nt_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1001>;
-class V6_vL32b_nt_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1010>;
-
-class Enc_COPROC_VMEM_vS32_b_ai_64B<bits<4> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<10> src2;
- bits<4> src2_vector;
- bits<5> src3;
-
- let src2_vector = src2{9-6};
- let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
- let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
-}
-
-class Enc_COPROC_VMEM_vS32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<11> src2;
- bits<4> src2_vector;
- bits<5> src3;
-
- let src2_vector = src2{10-7};
- let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
- let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
-}
-
-class V6_vS32b_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0000>;
-class V6_vS32Ub_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0111>;
-class V6_vS32b_nt_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b1000>;
-
-class V6_vS32b_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0000>;
-class V6_vS32Ub_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0111>;
-class V6_vS32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b1000>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<bits<1> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<10> src2;
- bits<4> src2_vector;
- bits<3> src3;
-
- let src2_vector = src2{9-6};
- let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
- let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
-}
-
-class V6_vS32b_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<0>;
-class V6_vS32b_nt_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<1>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<bits<1> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<11> src2;
- bits<4> src2_vector;
- bits<3> src3;
-
- let src2_vector = src2{10-7};
- let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
- let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
-}
-
-class V6_vS32b_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<0>;
-class V6_vS32b_nt_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<1>;
-
-class Enc_COPROC_VMEM_vS32_b_pred_ai<bits<5> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<10> src3;
- bits<4> src3_vector;
- bits<5> src4;
-
- let src3_vector = src3{9-6};
- let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
- let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
-}
-
-class Enc_COPROC_VMEM_vS32_b_pred_ai_128B<bits<5> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<11> src3;
- bits<4> src3_vector;
- bits<5> src4;
-
- let src3_vector = src3{10-7};
- let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
- let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
-}
-
-class V6_vS32b_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00000>;
-class V6_vS32b_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00001>;
-class V6_vS32b_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01000>;
-class V6_vS32b_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01001>;
-class V6_vS32Ub_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01110>;
-class V6_vS32Ub_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01111>;
-class V6_vS32b_nt_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10000>;
-class V6_vS32b_nt_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10001>;
-class V6_vS32b_nt_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11000>;
-class V6_vS32b_nt_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11001>;
-
-class V6_vS32b_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00000>;
-class V6_vS32b_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00001>;
-class V6_vS32b_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01000>;
-class V6_vS32b_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01001>;
-class V6_vS32Ub_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01110>;
-class V6_vS32Ub_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01111>;
-class V6_vS32b_nt_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10000>;
-class V6_vS32b_nt_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10001>;
-class V6_vS32b_nt_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11000>;
-class V6_vS32b_nt_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11001>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<bits<4> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<10> src3;
- bits<4> src3_vector;
- bits<3> src4;
-
- let src3_vector = src3{9-6};
- let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
- let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
-}
-
-class V6_vS32b_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0000>;
-class V6_vS32b_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0101>;
-class V6_vS32b_nt_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1010>;
-class V6_vS32b_nt_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1111>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<bits<4> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<11> src3;
- bits<4> src3_vector;
- bits<3> src4;
-
- let src3_vector = src3{10-7};
- let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
- let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
-}
-
-class V6_vS32b_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0000>;
-class V6_vS32b_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0101>;
-class V6_vS32b_nt_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1010>;
-class V6_vS32b_nt_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1111>;
-
-// TODO: Change script to generate dst, src1, src2 instead of
-// dst, dst2, src1.
-class Enc_COPROC_VMEM_vL32_b_pi<bits<4> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<9> src2;
- bits<3> src2_vector;
-
- let src2_vector = src2{8-6};
- let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
- let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_vL32b_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0000>;
-class V6_vL32b_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0001>;
-class V6_vL32b_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0010>;
-class V6_vL32Ub_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0111>;
-class V6_vL32b_nt_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1000>;
-class V6_vL32b_nt_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1001>;
-class V6_vL32b_nt_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1010>;
-
-class Enc_COPROC_VMEM_vL32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<10> src2;
- bits<3> src2_vector;
-
- let src2_vector = src2{9-7};
- let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
- let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_vL32b_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0000>;
-class V6_vL32b_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0001>;
-class V6_vL32b_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0010>;
-class V6_vL32Ub_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0111>;
-class V6_vL32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1000>;
-class V6_vL32b_nt_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1001>;
-class V6_vL32b_nt_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1010>;
-
-
-// TODO: Change script to generate src1, src2 and src3 instead of
-// dst, src1, src2.
-class Enc_COPROC_VMEM_vS32_b_pi<bits<4> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<9> src2;
- bits<3> src2_vector;
- bits<5> src3;
-
- let src2_vector = src2{8-6};
- let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
- let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
-}
-
-class V6_vS32b_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0000>;
-class V6_vS32Ub_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0111>;
-class V6_vS32b_nt_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b1000>;
-
-class Enc_COPROC_VMEM_vS32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<10> src2;
- bits<3> src2_vector;
- bits<5> src3;
-
- let src2_vector = src2{9-7};
- let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
- let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
-}
-
-class V6_vS32b_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0000>;
-class V6_vS32Ub_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0111>;
-class V6_vS32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b1000>;
-
-// TODO: Change script to generate src1, src2 and src3 instead of
-// dst, src1, src2.
-class Enc_COPROC_VMEM_vS32b_n_ew_pi<bits<1> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<9> src2;
- bits<3> src2_vector;
- bits<3> src3;
-
- let src2_vector = src2{8-6};
- let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
- let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
-}
-
-class V6_vS32b_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<0>;
-class V6_vS32b_nt_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<1>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<bits<1> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<10> src2;
- bits<3> src2_vector;
- bits<3> src3;
-
- let src2_vector = src2{9-7};
- let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
- let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
-}
-
-class V6_vS32b_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<0>;
-class V6_vS32b_nt_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<1>;
-
-// TODO: Change script to generate src1, src2,src3 and src4 instead of
-// dst, src1, src2, src3.
-class Enc_COPROC_VMEM_vS32_b_pred_pi<bits<5> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<9> src3;
- bits<3> src3_vector;
- bits<5> src4;
-
- let src3_vector = src3{8-6};
- let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
- let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
-}
-
-class V6_vS32b_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00000>;
-class V6_vS32b_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00001>;
-class V6_vS32b_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01000>;
-class V6_vS32b_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01001>;
-class V6_vS32Ub_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01110>;
-class V6_vS32Ub_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01111>;
-class V6_vS32b_nt_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10000>;
-class V6_vS32b_nt_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10001>;
-class V6_vS32b_nt_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11000>;
-class V6_vS32b_nt_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11001>;
-
-// TODO: Change script to generate src1, src2,src3 and src4 instead of
-// dst, src1, src2, src3.
-class Enc_COPROC_VMEM_vS32_b_pred_pi_128B<bits<5> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<10> src3;
- bits<3> src3_vector;
- bits<5> src4;
-
- let src3_vector = src3{9-7};
- let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
- let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
-}
-
-class V6_vS32b_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00000>;
-class V6_vS32b_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00001>;
-class V6_vS32b_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01000>;
-class V6_vS32b_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01001>;
-class V6_vS32Ub_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01110>;
-class V6_vS32Ub_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01111>;
-class V6_vS32b_nt_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10000>;
-class V6_vS32b_nt_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10001>;
-class V6_vS32b_nt_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11000>;
-class V6_vS32b_nt_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11001>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<bits<4> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<9> src3;
- bits<3> src3_vector;
- bits<3> src4;
-
- let src3_vector = src3{8-6};
- let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
- let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
-}
-
-class V6_vS32b_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0000>;
-class V6_vS32b_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0101>;
-class V6_vS32b_nt_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1010>;
-class V6_vS32b_nt_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1111>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<bits<4> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<10> src3;
- bits<3> src3_vector;
- bits<3> src4;
-
- let src3_vector = src3{9-7};
- let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
- let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
-}
-
-class V6_vS32b_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0000>;
-class V6_vS32b_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0101>;
-class V6_vS32b_nt_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1010>;
-class V6_vS32b_nt_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1111>;
-
-class Enc_LD_load_m<bits<13> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<1> src2;
-
- let Inst{31-16} = { opc{12}, 0, opc{11-10}, 1, opc{9-4}, src1{4-0} };
- let Inst{13-0} = { src2{0}, 0b000, opc{3}, 0, opc{2-0}, dst{4-0} };
-}
-
-class V6_vL32b_ppu_enc : Enc_LD_load_m<0b0100110000000>;
-class V6_vL32b_cur_ppu_enc : Enc_LD_load_m<0b0100110000001>;
-class V6_vL32b_tmp_ppu_enc : Enc_LD_load_m<0b0100110000010>;
-class V6_vL32Ub_ppu_enc : Enc_LD_load_m<0b0100110000111>;
-class V6_vL32b_nt_ppu_enc : Enc_LD_load_m<0b0100110100000>;
-class V6_vL32b_nt_cur_ppu_enc : Enc_LD_load_m<0b0100110100001>;
-class V6_vL32b_nt_tmp_ppu_enc : Enc_LD_load_m<0b0100110100010>;
-
-class Enc_COPROC_VMEM_vS32_b_ppu<bits<4> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<1> src2;
- bits<5> src3;
-
- let Inst{31-16} = { 0b001010110, opc{3}, 1, src1{4-0} };
- let Inst{13-0} = { src2{0}, 0b00000, opc{2-0}, src3{4-0} };
-}
-
-class V6_vS32b_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0000>;
-class V6_vS32Ub_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0111>;
-class V6_vS32b_nt_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b1000>;
-
-class Enc_COPROC_VMEM_vS32b_new_ppu<bits<1> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<1> src2;
- bits<3> src3;
-
- let Inst{31-16} = { 0b001010110, opc{0}, 1, src1{4-0} };
- let Inst{13-0} = { src2{0}, 0b0000000100, src3{2-0} };
-}
-
-class V6_vS32b_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<0>;
-class V6_vS32b_nt_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<1>;
-
-class Enc_COPROC_VMEM_vS32_b_pred_ppu<bits<5> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<1> src3;
- bits<5> src4;
-
- let Inst{31-16} = { 0b001010111, opc{4-3}, src2{4-0} };
- let Inst{13-0} = { src3{0}, src1{1-0}, 0b000, opc{2-0}, src4{4-0} };
-}
-
-class V6_vS32b_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00000>;
-class V6_vS32b_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00001>;
-class V6_vS32b_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01000>;
-class V6_vS32b_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01001>;
-class V6_vS32Ub_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01110>;
-class V6_vS32Ub_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01111>;
-class V6_vS32b_nt_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10000>;
-class V6_vS32b_nt_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10001>;
-class V6_vS32b_nt_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11000>;
-class V6_vS32b_nt_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11001>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<bits<4> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<1> src3;
- bits<3> src4;
-
- let Inst{31-16} = { 0b001010111, opc{3}, 1, src2{4-0} };
- let Inst{13-0} = { src3{0}, src1{1-0}, 0b00001, opc{2-0}, src4{2-0} };
-}
-
-class V6_vS32b_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0000>;
-class V6_vS32b_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0101>;
-class V6_vS32b_nt_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1010>;
-class V6_vS32b_nt_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1111>;
-
-
-class Enc_COPROC_VX_4op_i<bits<5> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
- bits<1> src3;
-
- let Inst{31-16} = { 0b00011001, opc{4-2}, src2{4-0} };
- let Inst{13-0} = { opc{1}, src1{4-0}, 1, opc{0}, src3{0}, dst{4-0} };
-}
-
-class V6_vrmpybusi_enc : Enc_COPROC_VX_4op_i<0b01000>;
-class V6_vrsadubi_enc : Enc_COPROC_VX_4op_i<0b01001>;
-class V6_vrmpybusi_acc_enc : Enc_COPROC_VX_4op_i<0b01010>;
-class V6_vrsadubi_acc_enc : Enc_COPROC_VX_4op_i<0b01011>;
-class V6_vrmpyubi_acc_enc : Enc_COPROC_VX_4op_i<0b01111>;
-class V6_vrmpyubi_enc : Enc_COPROC_VX_4op_i<0b10101>;
-
-class Enc_COPROC_VX_vandqrt<bits<5> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<2> src1;
- bits<5> src2;
-
- let Inst{31-16} = { 0b00011001, opc{4-3}, 1, src2{4-0} };
- let Inst{13-0} = { opc{2}, 0b000, src1{1-0}, opc{1-0}, 1, dst{4-0} };
-}
-
-class V6_vandqrt_acc_enc : Enc_COPROC_VX_vandqrt<0b01101>;
-class V6_vandqrt_enc : Enc_COPROC_VX_vandqrt<0b10010>;
-
-class Enc_COPROC_VX_cards<bits<2> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<5> src2;
- bits<5> src3;
-
- let Inst{31-16} = { 0b00011001111, src3{4-0} };
- let Inst{13-0} = { 1, src1{4-0}, 0, opc{1-0}, src2{4-0} };
-}
-
-class V6_vshuff_enc : Enc_COPROC_VX_cards<0b01>;
-class V6_vdeal_enc : Enc_COPROC_VX_cards<0b10>;
-
-
-class Enc_COPROC_VX_v_cmov<bits<1> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> dst;
- bits<5> src2;
-
- let Inst{31-16} = { 0b0001101000, opc{0}, 0b00000 };
- let Inst{13-0} = { 0, src2{4-0}, 0, src1{1-0}, dst{4-0} };
-}
-
-class V6_vcmov_enc : Enc_COPROC_VX_v_cmov<0>;
-class V6_vncmov_enc : Enc_COPROC_VX_v_cmov<1>;
-
-class Enc_X_p3op<bits<8> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> dst;
- bits<5> src2;
- bits<5> src3;
-
- let Inst{31-16} = { opc{7-5}, 0b1101, opc{4}, 0, opc{3-2}, src3{4-0} };
- let Inst{13-0} = { opc{1}, src2{4-0}, opc{0}, src1{1-0}, dst{4-0} };
-}
-
-class V6_vnccombine_enc : Enc_X_p3op<0b00001000>;
-class V6_vccombine_enc : Enc_X_p3op<0b00001100>;
-
-class Enc_COPROC_VX_4op_r<bits<4> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
- bits<3> src3;
-
- let Inst{31-16} = { 0b00011011, src2{4-0}, src3{2-0} };
- let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_valignb_enc : Enc_COPROC_VX_4op_r<0b0000>;
-class V6_vlalignb_enc : Enc_COPROC_VX_4op_r<0b0001>;
-class V6_vasrwh_enc : Enc_COPROC_VX_4op_r<0b0010>;
-class V6_vasrwhsat_enc : Enc_COPROC_VX_4op_r<0b0011>;
-class V6_vasrwhrndsat_enc : Enc_COPROC_VX_4op_r<0b0100>;
-class V6_vasrwuhsat_enc : Enc_COPROC_VX_4op_r<0b0101>;
-class V6_vasrhubsat_enc : Enc_COPROC_VX_4op_r<0b0110>;
-class V6_vasrhubrndsat_enc : Enc_COPROC_VX_4op_r<0b0111>;
-class V6_vasrhbrndsat_enc : Enc_COPROC_VX_4op_r<0b1000>;
-class V6_vlutvvb_enc : Enc_COPROC_VX_4op_r<0b1001>;
-class V6_vshuffvdd_enc : Enc_COPROC_VX_4op_r<0b1011>;
-class V6_vdealvdd_enc : Enc_COPROC_VX_4op_r<0b1100>;
-class V6_vlutvvb_oracc_enc : Enc_COPROC_VX_4op_r<0b1101>;
-class V6_vlutvwh_enc : Enc_COPROC_VX_4op_r<0b1110>;
-class V6_vlutvwh_oracc_enc : Enc_COPROC_VX_4op_r<0b1111>;
-
-class Enc_S_3op_valign_i<bits<9> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
- bits<3> src3;
-
- let Inst{31-16} = { opc{8-7}, 0, opc{6-3}, 0b00, opc{2-1}, src2{4-0} };
- let Inst{13-0} = { opc{0}, src1{4-0}, src3{2-0}, dst{4-0} };
-}
-
-class V6_vlutb_enc : Enc_S_3op_valign_i<0b001100000>;
-class V6_vlutb_dv_enc : Enc_S_3op_valign_i<0b001100010>;
-class V6_vlutb_acc_enc : Enc_S_3op_valign_i<0b001100100>;
-class V6_vlutb_dv_acc_enc : Enc_S_3op_valign_i<0b001100110>;
-class V6_valignbi_enc : Enc_S_3op_valign_i<0b001111011>;
-class V6_vlalignbi_enc : Enc_S_3op_valign_i<0b001111111>;
-class S2_valignib_enc : Enc_S_3op_valign_i<0b110000000>;
-class S2_addasl_rrri_enc : Enc_S_3op_valign_i<0b110010000>;
-
-class Enc_COPROC_VX_3op_q<bits<3> opc> : OpcodeHexagon {
- bits<2> dst;
- bits<2> src1;
- bits<2> src2;
-
- let Inst{31-16} = { 0b00011110, src2{1-0}, 0b000011 };
- let Inst{13-0} = { 0b0000, src1{1-0}, 0b000, opc{2-0}, dst{1-0} };
-}
-
-class V6_pred_and_enc : Enc_COPROC_VX_3op_q<0b000>;
-class V6_pred_or_enc : Enc_COPROC_VX_3op_q<0b001>;
-class V6_pred_xor_enc : Enc_COPROC_VX_3op_q<0b011>;
-class V6_pred_or_n_enc : Enc_COPROC_VX_3op_q<0b100>;
-class V6_pred_and_n_enc : Enc_COPROC_VX_3op_q<0b101>;
-
-class V6_pred_not_enc : OpcodeHexagon {
- bits<2> dst;
- bits<2> src1;
-
- let Inst{31-16} = { 0b0001111000000011 };
- let Inst{13-0} = { 0b0000, src1{1-0}, 0b000010, dst{1-0} };
-}
-
-class Enc_COPROC_VX_4op_q<bits<1> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<2> src1;
- bits<5> src2;
- bits<5> src3;
-
- let Inst{31-16} = { 0b000111101, opc{0}, 1, src3{4-0} };
- let Inst{13-0} = { 1, src2{4-0}, 0, src1{1-0}, dst{4-0} };
-}
-
-class V6_vswap_enc : Enc_COPROC_VX_4op_q<0>;
-class V6_vmux_enc : Enc_COPROC_VX_4op_q<1>;
-
-class Enc_X_2op<bits<16> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
-
- let Inst{31-16} = { opc{15-5}, src1{4-0} };
- let Inst{13-0} = { opc{4-3}, 0b0000, opc{2-0}, dst{4-0} };
-}
-
-class V6_lvsplatw_enc : Enc_X_2op<0b0001100110100001>;
-class V6_vinsertwr_enc : Enc_X_2op<0b0001100110110001>;
-class S6_vsplatrbp_enc : Enc_X_2op<0b1000010001000100>;
-
-
-class Enc_CR_2op_r<bits<12> opc> : OpcodeHexagon {
- bits<2> dst;
- bits<5> src1;
-
- let Inst{31-16} = { opc{11}, 0, opc{10-7}, 0, opc{6-3}, src1{4-0} };
- let Inst{13-0} = { opc{2}, 0b000000, opc{1}, 0b000, opc{0}, dst{1-0} };
-}
-
-class V6_pred_scalar2_enc : Enc_CR_2op_r<0b001101101011>;
-class Y5_l2locka_enc : Enc_CR_2op_r<0b110000111100>;
-
-class Enc_S_3op_i6<bits<9> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<6> src2;
-
- let Inst{31-16} = { 0b1000, opc{8-6}, 0, opc{5-3}, src1{4-0} };
- let Inst{13-0} = { src2{5-0}, opc{2-0}, dst{4-0} };
-}
-
-class S6_rol_i_p_enc : Enc_S_3op_i6<0b000000011>;
-class S6_rol_i_p_nac_enc : Enc_S_3op_i6<0b001000011>;
-class S6_rol_i_p_acc_enc : Enc_S_3op_i6<0b001000111>;
-class S6_rol_i_p_and_enc : Enc_S_3op_i6<0b001010011>;
-class S6_rol_i_p_or_enc : Enc_S_3op_i6<0b001010111>;
-class S6_rol_i_p_xacc_enc : Enc_S_3op_i6<0b001100011>;
-
-class Enc_X_3op_r<bits<15> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
-
- let Inst{31-16} = { opc{14-4}, src1{4-0} };
- let Inst{13-0} = { opc{3}, src2{4-0}, opc{2-0}, dst{4-0} };
-}
-
-class S6_rol_i_r_enc : Enc_X_3op_r<0b100011000000011>;
-class S6_rol_i_r_nac_enc : Enc_X_3op_r<0b100011100000011>;
-class S6_rol_i_r_acc_enc : Enc_X_3op_r<0b100011100000111>;
-class S6_rol_i_r_and_enc : Enc_X_3op_r<0b100011100100011>;
-class S6_rol_i_r_or_enc : Enc_X_3op_r<0b100011100100111>;
-class S6_rol_i_r_xacc_enc : Enc_X_3op_r<0b100011101000011>;
-class S6_vtrunehb_ppp_enc : Enc_X_3op_r<0b110000011000011>;
-class S6_vtrunohb_ppp_enc : Enc_X_3op_r<0b110000011000101>;
-
-class Enc_no_operands<bits<25> opc> : OpcodeHexagon {
-
- let Inst{31-16} = { opc{24-10}, 0 };
- let Inst{13-0} = { opc{9-7}, 0b000, opc{6-0}, 0 };
-}
-
-class Y5_l2gunlock_enc : Enc_no_operands<0b1010100000100000010000000>;
-class Y5_l2gclean_enc : Enc_no_operands<0b1010100000100000100000000>;
-class Y5_l2gcleaninv_enc : Enc_no_operands<0b1010100000100000110000000>;
-class V6_vhist_enc : Enc_no_operands<0b0001111000000001001000000>;
-
-class Enc_J_jumpr<bits<13> opc> : OpcodeHexagon {
- bits<5> src1;
-
- let Inst{31-16} = { opc{12-6}, 0, opc{5-3}, src1{4-0} };
- let Inst{13-0} = { 0b00, opc{2}, 0b0000, opc{1-0}, 0b00000 };
-}
-
-class Y5_l2unlocka_enc : Enc_J_jumpr<0b1010011011000>;
-class Y2_l2cleaninvidx_enc : Enc_J_jumpr<0b1010100011000>;
-
-class Enc_ST_l2gclean_pa<bits<2> opc> : OpcodeHexagon {
- bits<5> src1;
-
- let Inst{31-16} = { 0b101001101, opc{1-0}, 0b00000 };
- let Inst{13-0} = { 0, src1{4-0}, 0b00000000 };
-}
-
-class Y6_l2gcleanpa_enc : Enc_ST_l2gclean_pa<0b01>;
-class Y6_l2gcleaninvpa_enc : Enc_ST_l2gclean_pa<0b10>;
-
-class A5_ACS_enc : OpcodeHexagon {
- bits<5> dst1;
- bits<2> dst2;
- bits<5> src1;
- bits<5> src2;
-
- let Inst{31-16} = { 0b11101010101, src1{4-0} };
- let Inst{13-0} = { 0, src2{4-0}, 0, dst2{1-0}, dst1{4-0} };
-}
-
-class Enc_X_4op_r<bits<8> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
- bits<2> src3;
-
- let Inst{31-16} = { 0b11, opc{7}, 0, opc{6-5}, 1, opc{4-1}, src1{4-0} };
- let Inst{13-0} = { 0, src2{4-0}, opc{0}, src3{1-0}, dst{4-0} };
-}
-
-class S2_vsplicerb_enc : Enc_X_4op_r<0b00001000>;
-class S2_cabacencbin_enc : Enc_X_4op_r<0b00001010>;
-class F2_sffma_sc_enc : Enc_X_4op_r<0b11110111>;
-
-class V6_vhistq_enc : OpcodeHexagon {
- bits<2> src1;
-
- let Inst{31-16} = { 0b00011110, src1{1-0}, 0b000010 };
- let Inst{13-0} = { 0b10000010000000 };
-}
-
-// TODO: Change script to generate dst1 instead of dst.
-class A6_vminub_RdP_enc : OpcodeHexagon {
- bits<5> dst1;
- bits<2> dst2;
- bits<5> src1;
- bits<5> src2;
-
- let Inst{31-16} = { 0b11101010111, src2{4-0} };
- let Inst{13-0} = { 0, src1{4-0}, 0, dst2{1-0}, dst1{4-0} };
-}
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index fa3cccbd0879..39c2a6e4f5a5 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -7,26 +7,6 @@
//
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Hexagon Instruction Flags +
-//
-// *** Must match HexagonBaseInfo.h ***
-//===----------------------------------------------------------------------===//
-
-class IType<bits<5> t> {
- bits<5> Value = t;
-}
-def TypePSEUDO : IType<0>;
-def TypeALU32 : IType<1>;
-def TypeCR : IType<2>;
-def TypeJR : IType<3>;
-def TypeJ : IType<4>;
-def TypeLD : IType<5>;
-def TypeST : IType<6>;
-def TypeSYSTEM : IType<7>;
-def TypeXTYPE : IType<8>;
-def TypeENDLOOP: IType<31>;
-
// Maintain list of valid subtargets for each instruction.
class SubTarget<bits<6> value> {
bits<6> Value = value;
@@ -54,6 +34,7 @@ class MemAccessSize<bits<4> value> {
bits<4> Value = value;
}
+// MemAccessSize is represented as 1+log2(N) where N is size in bits.
def NoMemAccess : MemAccessSize<0>;// Not a memory access instruction.
def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb).
def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh).
@@ -70,10 +51,9 @@ def Vector128Access : MemAccessSize<8>;// Vector access instruction (memv)
class OpcodeHexagon {
field bits<32> Inst = ?; // Default to an invalid insn.
bits<4> IClass = 0; // ICLASS
+ bits<1> zero = 0;
let Inst{31-28} = IClass;
-
- bits<1> zero = 0;
}
class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
@@ -99,85 +79,88 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
// Instruction type according to the ISA.
IType Type = type;
- let TSFlags{4-0} = Type.Value;
+ let TSFlags{5-0} = Type.Value;
// Solo instructions, i.e., those that cannot be in a packet with others.
bits<1> isSolo = 0;
- let TSFlags{5} = isSolo;
+ let TSFlags{6} = isSolo;
// Packed only with A or X-type instructions.
bits<1> isSoloAX = 0;
- let TSFlags{6} = isSoloAX;
+ let TSFlags{7} = isSoloAX;
// Only A-type instruction in first slot or nothing.
bits<1> isSoloAin1 = 0;
- let TSFlags{7} = isSoloAin1;
+ let TSFlags{8} = isSoloAin1;
// Predicated instructions.
bits<1> isPredicated = 0;
- let TSFlags{8} = isPredicated;
+ let TSFlags{9} = isPredicated;
bits<1> isPredicatedFalse = 0;
- let TSFlags{9} = isPredicatedFalse;
+ let TSFlags{10} = isPredicatedFalse;
bits<1> isPredicatedNew = 0;
- let TSFlags{10} = isPredicatedNew;
+ let TSFlags{11} = isPredicatedNew;
bits<1> isPredicateLate = 0;
- let TSFlags{11} = isPredicateLate; // Late predicate producer insn.
+ let TSFlags{12} = isPredicateLate; // Late predicate producer insn.
// New-value insn helper fields.
bits<1> isNewValue = 0;
- let TSFlags{12} = isNewValue; // New-value consumer insn.
+ let TSFlags{13} = isNewValue; // New-value consumer insn.
bits<1> hasNewValue = 0;
- let TSFlags{13} = hasNewValue; // New-value producer insn.
+ let TSFlags{14} = hasNewValue; // New-value producer insn.
bits<3> opNewValue = 0;
- let TSFlags{16-14} = opNewValue; // New-value produced operand.
+ let TSFlags{17-15} = opNewValue; // New-value produced operand.
bits<1> isNVStorable = 0;
- let TSFlags{17} = isNVStorable; // Store that can become new-value store.
+ let TSFlags{18} = isNVStorable; // Store that can become new-value store.
bits<1> isNVStore = 0;
- let TSFlags{18} = isNVStore; // New-value store insn.
+ let TSFlags{19} = isNVStore; // New-value store insn.
bits<1> isCVLoadable = 0;
- let TSFlags{19} = isCVLoadable; // Load that can become cur-value load.
+ let TSFlags{20} = isCVLoadable; // Load that can become cur-value load.
bits<1> isCVLoad = 0;
- let TSFlags{20} = isCVLoad; // Cur-value load insn.
+ let TSFlags{21} = isCVLoad; // Cur-value load insn.
// Immediate extender helper fields.
bits<1> isExtendable = 0;
- let TSFlags{21} = isExtendable; // Insn may be extended.
+ let TSFlags{22} = isExtendable; // Insn may be extended.
bits<1> isExtended = 0;
- let TSFlags{22} = isExtended; // Insn must be extended.
+ let TSFlags{23} = isExtended; // Insn must be extended.
bits<3> opExtendable = 0;
- let TSFlags{25-23} = opExtendable; // Which operand may be extended.
+ let TSFlags{26-24} = opExtendable; // Which operand may be extended.
bits<1> isExtentSigned = 0;
- let TSFlags{26} = isExtentSigned; // Signed or unsigned range.
+ let TSFlags{27} = isExtentSigned; // Signed or unsigned range.
bits<5> opExtentBits = 0;
- let TSFlags{31-27} = opExtentBits; //Number of bits of range before extending.
+ let TSFlags{32-28} = opExtentBits; //Number of bits of range before extending.
bits<2> opExtentAlign = 0;
- let TSFlags{33-32} = opExtentAlign; // Alignment exponent before extending.
+ let TSFlags{34-33} = opExtentAlign; // Alignment exponent before extending.
// If an instruction is valid on a subtarget, set the corresponding
// bit from validSubTargets.
// By default, instruction is valid on all subtargets.
SubTarget validSubTargets = HasAnySubT;
- let TSFlags{39-34} = validSubTargets.Value;
+ let TSFlags{40-35} = validSubTargets.Value;
// Addressing mode for load/store instructions.
AddrModeType addrMode = NoAddrMode;
- let TSFlags{42-40} = addrMode.Value;
+ let TSFlags{43-41} = addrMode.Value;
// Memory access size for mem access instructions (load/store)
MemAccessSize accessSize = NoMemAccess;
- let TSFlags{46-43} = accessSize.Value;
+ let TSFlags{47-44} = accessSize.Value;
bits<1> isTaken = 0;
- let TSFlags {47} = isTaken; // Branch prediction.
+ let TSFlags {48} = isTaken; // Branch prediction.
bits<1> isFP = 0;
- let TSFlags {48} = isFP; // Floating-point.
+ let TSFlags {49} = isFP; // Floating-point.
bits<1> hasNewValue2 = 0;
- let TSFlags{50} = hasNewValue2; // Second New-value producer insn.
+ let TSFlags{51} = hasNewValue2; // Second New-value producer insn.
bits<3> opNewValue2 = 0;
- let TSFlags{53-51} = opNewValue2; // Second New-value produced operand.
+ let TSFlags{54-52} = opNewValue2; // Second New-value produced operand.
bits<1> isAccumulator = 0;
- let TSFlags{54} = isAccumulator;
+ let TSFlags{55} = isAccumulator;
+
+ bits<1> prefersSlot3 = 0;
+ let TSFlags{56} = prefersSlot3; // Complex XU
bit cofMax1 = 0;
let TSFlags{60} = cofMax1;
@@ -200,9 +183,13 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
let NValueST = !if(isNVStore, "true", "false");
let isNT = !if(isNonTemporal, "true", "false");
+ let hasSideEffects = 0;
// *** Must match MCTargetDesc/HexagonBaseInfo.h ***
}
+class HInst<dag outs, dag ins, string asmstr, InstrItinClass itin, IType type> :
+ InstHexagon<outs, ins, asmstr, [], "", itin, type>;
+
//===----------------------------------------------------------------------===//
// Instruction Classes Definitions +
//===----------------------------------------------------------------------===//
@@ -214,14 +201,13 @@ class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
-let mayLoad = 1 in
-class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : LDInst<outs, ins, asmstr, pattern, cstr>;
+class PseudoLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
- : LDInst<outs, ins, asmstr, pattern, cstr>;
+ : PseudoLDInst<outs, ins, asmstr, pattern, cstr>;
// LD Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
@@ -247,6 +233,11 @@ class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
+let mayStore = 1 in
+class STInst_NoOpcode<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+
class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
: STInst<outs, ins, asmstr, pattern, cstr>;
@@ -269,28 +260,24 @@ class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
: STInst<outs, ins, asmstr, pattern, cstr, itin>;
-// SYSTEM Instruction Class in V4 can take SLOT0 only
-// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
-class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = ST_tc_3stall_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>,
- OpcodeHexagon;
-
-// ALU32 Instruction Class in V2/V3/V4.
-// Definition of the instruction class NOT CHANGED.
-class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>, OpcodeHexagon;
-
// ALU64 Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU64>,
OpcodeHexagon;
+// ALU64 Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
+class ALU64Inst_NoOpcode<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU64>;
+
+
class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
: ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>;
@@ -302,13 +289,13 @@ class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeM>,
OpcodeHexagon;
// Same as above but doesn't derive from OpcodeHexagon
class MInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeM>;
// M Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -324,12 +311,16 @@ class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeS_2op>,
OpcodeHexagon;
+class SInst_NoOpcode<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeS_2op>;
+
class SInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeS_2op>;
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -337,7 +328,9 @@ class SInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = S_3op_tc_1_SLOT23>
- : SInst<outs, ins, asmstr, pattern, cstr, itin>;
+ : SInst<outs, ins, asmstr, pattern, cstr, itin> {
+ let Type = TypeS_3op;
+}
// J Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
@@ -349,12 +342,6 @@ class JInst_CJUMP_UCJUMP<dag outs, dag ins, string asmstr, list<dag> pattern = [
string cstr = "", InstrItinClass itin = J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>, OpcodeHexagon;
-// JR Instruction Class in V2/V3/V4.
-// Definition of the instruction class NOT CHANGED.
-class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>, OpcodeHexagon;
-
// CR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
@@ -383,26 +370,6 @@ class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Instruction Classes Definitions -
//===----------------------------------------------------------------------===//
-
-//
-// ALU32 patterns
-//.
-class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
-
-class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
-
-class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
-
-class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
-
//
// ALU64 patterns.
//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index 493d04703da9..1fdf930c62fd 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -11,18 +11,6 @@
//
//===----------------------------------------------------------------------===//
-//----------------------------------------------------------------------------//
-// Hexagon Instruction Flags
-//
-// *** Must match BaseInfo.h ***
-//----------------------------------------------------------------------------//
-
-def TypeV4LDST : IType<9>;
-def TypeNV : IType<10>;
-def TypeDUPLEX : IType<11>;
-def TypeCOMPOUND : IType<12>;
-def TypePREFIX : IType<30>;
-
// Duplex Instruction Class Declaration
//===----------------------------------------------------------------------===//
@@ -61,7 +49,7 @@ class InstDuplex<bits<4> iClass, list<dag> pattern = [],
// *** Must match MCTargetDesc/HexagonBaseInfo.h ***
- let TSFlags{4-0} = Type.Value;
+ let TSFlags{5-0} = Type.Value;
// Predicated instructions.
bits<1> isPredicated = 0;
@@ -107,7 +95,7 @@ class InstDuplex<bits<4> iClass, list<dag> pattern = [],
//
class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>, OpcodeHexagon;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNCJ>, OpcodeHexagon;
class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
@@ -141,7 +129,7 @@ class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
: InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123,
- TypePREFIX>, OpcodeHexagon;
+ TypeEXTENDER>, OpcodeHexagon;
class SUBInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
@@ -150,11 +138,11 @@ class SUBInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>,
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>,
OpcodeHexagon;
class CJInst_JMPSET<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>,
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCJ>,
OpcodeHexagon;
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
index b9f4373a0b79..c8a7faea5ed5 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV60.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -12,28 +12,6 @@
//===----------------------------------------------------------------------===//
//----------------------------------------------------------------------------//
-// Hexagon Instruction Flags +
-//
-// *** Must match BaseInfo.h ***
-//----------------------------------------------------------------------------//
-
-def TypeCVI_VA : IType<13>;
-def TypeCVI_VA_DV : IType<14>;
-def TypeCVI_VX : IType<15>;
-def TypeCVI_VX_DV : IType<16>;
-def TypeCVI_VP : IType<17>;
-def TypeCVI_VP_VS : IType<18>;
-def TypeCVI_VS : IType<19>;
-def TypeCVI_VINLANESAT : IType<20>;
-def TypeCVI_VM_LD : IType<21>;
-def TypeCVI_VM_TMP_LD : IType<22>;
-def TypeCVI_VM_CUR_LD : IType<23>;
-def TypeCVI_VM_VP_LDU : IType<24>;
-def TypeCVI_VM_ST : IType<25>;
-def TypeCVI_VM_NEW_ST : IType<26>;
-def TypeCVI_VM_STU : IType<27>;
-def TypeCVI_HIST : IType<28>;
-//----------------------------------------------------------------------------//
// Instruction Classes Definitions +
//----------------------------------------------------------------------------//
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 0a7dc6b49d00..b265a883da5c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -152,10 +152,11 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
/// On Hexagon, we have two instructions used to set-up the hardware loop
/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
/// to indicate the end of a loop.
-static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
+static MachineInstr *findLoopInstr(MachineBasicBlock *BB, unsigned EndLoopOp,
+ MachineBasicBlock *TargetBB,
SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
- int LOOPi;
- int LOOPr;
+ unsigned LOOPi;
+ unsigned LOOPr;
if (EndLoopOp == Hexagon::ENDLOOP0) {
LOOPi = Hexagon::J2_loop0i;
LOOPr = Hexagon::J2_loop0r;
@@ -165,26 +166,24 @@ static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
}
// The loop set-up instruction will be in a predecessor block
- for (MachineBasicBlock::pred_iterator PB = BB->pred_begin(),
- PE = BB->pred_end(); PB != PE; ++PB) {
+ for (MachineBasicBlock *PB : BB->predecessors()) {
// If this has been visited, already skip it.
- if (!Visited.insert(*PB).second)
+ if (!Visited.insert(PB).second)
continue;
- if (*PB == BB)
+ if (PB == BB)
continue;
- for (MachineBasicBlock::reverse_instr_iterator I = (*PB)->instr_rbegin(),
- E = (*PB)->instr_rend(); I != E; ++I) {
- int Opc = I->getOpcode();
+ for (auto I = PB->instr_rbegin(), E = PB->instr_rend(); I != E; ++I) {
+ unsigned Opc = I->getOpcode();
if (Opc == LOOPi || Opc == LOOPr)
return &*I;
- // We've reached a different loop, which means the loop0 has been removed.
- if (Opc == EndLoopOp)
+ // We've reached a different loop, which means the loop01 has been
+ // removed.
+ if (Opc == EndLoopOp && I->getOperand(0).getMBB() != TargetBB)
return nullptr;
}
// Check the predecessors for the LOOP instruction.
- MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited);
- if (loop)
- return loop;
+ if (MachineInstr *Loop = findLoopInstr(PB, EndLoopOp, TargetBB, Visited))
+ return Loop;
}
return nullptr;
}
@@ -597,7 +596,8 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB,
// Since we're adding an ENDLOOP, there better be a LOOP instruction.
// Check for it, and change the BB target if needed.
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
- MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+ MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, Cond[1].getMBB(),
+ VisitedBBs);
assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
Loop->getOperand(0).setMBB(TBB);
// Add the ENDLOOP after the finding the LOOP0.
@@ -637,7 +637,8 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB,
// Since we're adding an ENDLOOP, there better be a LOOP instruction.
// Check for it, and change the BB target if needed.
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
- MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+ MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, Cond[1].getMBB(),
+ VisitedBBs);
assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
Loop->getOperand(0).setMBB(TBB);
// Add the ENDLOOP after the finding the LOOP0.
@@ -687,7 +688,8 @@ unsigned HexagonInstrInfo::reduceLoopCount(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
DebugLoc DL = Cmp.getDebugLoc();
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
- MachineInstr *Loop = findLoopInstr(&MBB, Cmp.getOpcode(), VisitedBBs);
+ MachineInstr *Loop = findLoopInstr(&MBB, Cmp.getOpcode(),
+ Cmp.getOperand(0).getMBB(), VisitedBBs);
if (!Loop)
return 0;
// If the loop trip count is a compile-time value, then just change the
@@ -1074,13 +1076,13 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
MachineInstr *MI1New =
BuildMI(MBB, MI, DL, get(NewOpc))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addImm(MI.getOperand(1).getImm())
.addReg(SrcSubLo)
.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI1New->getOperand(0).setIsKill(false);
BuildMI(MBB, MI, DL, get(NewOpc))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
// The Vectors are indexed in multiples of vector size.
.addImm(MI.getOperand(1).getImm() + Offset)
.addReg(SrcSubHi)
@@ -1106,15 +1108,13 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
- MachineInstr *MI1New =
- BuildMI(MBB, MI, DL, get(NewOpc),
- HRI.getSubReg(DstReg, Hexagon::vsub_lo))
- .addOperand(MI.getOperand(1))
- .addImm(MI.getOperand(2).getImm());
+ MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc),
+ HRI.getSubReg(DstReg, Hexagon::vsub_lo))
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm());
MI1New->getOperand(1).setIsKill(false);
- BuildMI(MBB, MI, DL, get(NewOpc),
- HRI.getSubReg(DstReg, Hexagon::vsub_hi))
- .addOperand(MI.getOperand(1))
+ BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_hi))
+ .add(MI.getOperand(1))
// The Vectors are indexed in multiples of vector size.
.addImm(MI.getOperand(2).getImm() + Offset)
.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
@@ -1227,18 +1227,18 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
if (Op0.getReg() != Op2.getReg()) {
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vcmov))
- .addOperand(Op0)
- .addOperand(Op1)
- .addOperand(Op2);
+ .add(Op0)
+ .add(Op1)
+ .add(Op2);
if (IsDestLive)
T.addReg(Op0.getReg(), RegState::Implicit);
IsDestLive = true;
}
if (Op0.getReg() != Op3.getReg()) {
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vncmov))
- .addOperand(Op0)
- .addOperand(Op1)
- .addOperand(Op3);
+ .add(Op0)
+ .add(Op1)
+ .add(Op3);
if (IsDestLive)
T.addReg(Op0.getReg(), RegState::Implicit);
}
@@ -1259,10 +1259,10 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo);
unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi);
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine))
- .addOperand(Op0)
- .addOperand(Op1)
- .addReg(SrcHi)
- .addReg(SrcLo);
+ .add(Op0)
+ .add(Op1)
+ .addReg(SrcHi)
+ .addReg(SrcLo);
if (IsDestLive)
T.addReg(Op0.getReg(), RegState::Implicit);
IsDestLive = true;
@@ -1271,10 +1271,10 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_lo);
unsigned SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi);
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine))
- .addOperand(Op0)
- .addOperand(Op1)
- .addReg(SrcHi)
- .addReg(SrcLo);
+ .add(Op0)
+ .add(Op1)
+ .addReg(SrcHi)
+ .addReg(SrcLo);
if (IsDestLive)
T.addReg(Op0.getReg(), RegState::Implicit);
}
@@ -1376,7 +1376,7 @@ bool HexagonInstrInfo::PredicateInstruction(
MachineOperand &Op = MI.getOperand(NOp);
if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
break;
- T.addOperand(Op);
+ T.add(Op);
NOp++;
}
@@ -1386,7 +1386,7 @@ bool HexagonInstrInfo::PredicateInstruction(
assert(GotPredReg);
T.addReg(PredReg, PredRegFlags);
while (NOp < NumOps)
- T.addOperand(MI.getOperand(NOp++));
+ T.add(MI.getOperand(NOp++));
MI.setDesc(get(PredOpc));
while (unsigned n = MI.getNumOperands())
@@ -1413,18 +1413,28 @@ bool HexagonInstrInfo::DefinesPredicate(
auto &HRI = getRegisterInfo();
for (unsigned oper = 0; oper < MI.getNumOperands(); ++oper) {
MachineOperand MO = MI.getOperand(oper);
- if (MO.isReg() && MO.isDef()) {
+ if (MO.isReg()) {
+ if (!MO.isDef())
+ continue;
const TargetRegisterClass* RC = HRI.getMinimalPhysRegClass(MO.getReg());
if (RC == &Hexagon::PredRegsRegClass) {
Pred.push_back(MO);
return true;
}
+ continue;
+ } else if (MO.isRegMask()) {
+ for (unsigned PR : Hexagon::PredRegsRegClass) {
+ if (!MI.modifiesRegister(PR, &HRI))
+ continue;
+ Pred.push_back(MO);
+ return true;
+ }
}
}
return false;
}
-bool HexagonInstrInfo::isPredicable(MachineInstr &MI) const {
+bool HexagonInstrInfo::isPredicable(const MachineInstr &MI) const {
return MI.getDesc().isPredicable();
}
@@ -1715,7 +1725,7 @@ bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const {
// Return true if the instruction is a compund branch instruction.
bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const {
- return (getType(MI) == HexagonII::TypeCOMPOUND && MI.isBranch());
+ return getType(MI) == HexagonII::TypeCJ && MI.isBranch();
}
bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const {
@@ -3009,10 +3019,12 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI,
bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI,
unsigned PredReg) const {
- for (unsigned opNum = 0; opNum < MI.getNumOperands(); opNum++) {
- const MachineOperand &MO = MI.getOperand(opNum);
+ for (const MachineOperand &MO : MI.operands()) {
+ // Predicate register must be explicitly defined.
+ if (MO.isRegMask() && MO.clobbersPhysReg(PredReg))
+ return false;
if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg))
- return false; // Predicate register must be explicitly defined.
+ return false;
}
// Hexagon Programmer's Reference says that decbin, memw_locked, and
@@ -3415,7 +3427,9 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
return NVOpcode;
switch (MI.getOpcode()) {
- default: llvm_unreachable("Unknown .new type");
+ default:
+ llvm::report_fatal_error(std::string("Unknown .new type: ") +
+ std::to_string(MI.getOpcode()).c_str());
case Hexagon::S4_storerb_ur:
return Hexagon::S4_storerbnew_ur;
@@ -3456,20 +3470,75 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI,
const MachineBranchProbabilityInfo *MBPI) const {
// We assume that block can have at most two successors.
- bool taken = false;
const MachineBasicBlock *Src = MI.getParent();
const MachineOperand &BrTarget = MI.getOperand(1);
- const MachineBasicBlock *Dst = BrTarget.getMBB();
+ bool Taken = false;
+ const BranchProbability OneHalf(1, 2);
- const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
- if (Prediction >= BranchProbability(1,2))
- taken = true;
+ if (BrTarget.isMBB()) {
+ const MachineBasicBlock *Dst = BrTarget.getMBB();
+ Taken = MBPI->getEdgeProbability(Src, Dst) >= OneHalf;
+ } else {
+ // The branch target is not a basic block (most likely a function).
+ // Since BPI only gives probabilities for targets that are basic blocks,
+ // try to identify another target of this branch (potentially a fall-
+ // -through) and check the probability of that target.
+ //
+ // The only handled branch combinations are:
+ // - one conditional branch,
+ // - one conditional branch followed by one unconditional branch.
+ // Otherwise, assume not-taken.
+ assert(MI.isConditionalBranch());
+ const MachineBasicBlock &B = *MI.getParent();
+ bool SawCond = false, Bad = false;
+ for (const MachineInstr &I : B) {
+ if (!I.isBranch())
+ continue;
+ if (I.isConditionalBranch()) {
+ SawCond = true;
+ if (&I != &MI) {
+ Bad = true;
+ break;
+ }
+ }
+ if (I.isUnconditionalBranch() && !SawCond) {
+ Bad = true;
+ break;
+ }
+ }
+ if (!Bad) {
+ MachineBasicBlock::const_instr_iterator It(MI);
+ MachineBasicBlock::const_instr_iterator NextIt = std::next(It);
+ if (NextIt == B.instr_end()) {
+ // If this branch is the last, look for the fall-through block.
+ for (const MachineBasicBlock *SB : B.successors()) {
+ if (!B.isLayoutSuccessor(SB))
+ continue;
+ Taken = MBPI->getEdgeProbability(Src, SB) < OneHalf;
+ break;
+ }
+ } else {
+ assert(NextIt->isUnconditionalBranch());
+ // Find the first MBB operand and assume it's the target.
+ const MachineBasicBlock *BT = nullptr;
+ for (const MachineOperand &Op : NextIt->operands()) {
+ if (!Op.isMBB())
+ continue;
+ BT = Op.getMBB();
+ break;
+ }
+ Taken = BT && MBPI->getEdgeProbability(Src, BT) < OneHalf;
+ }
+ } // if (!Bad)
+ }
+
+ // The Taken flag should be set to something reasonable by this point.
switch (MI.getOpcode()) {
case Hexagon::J2_jumpt:
- return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew;
+ return Taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew;
case Hexagon::J2_jumpf:
- return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew;
+ return Taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew;
default:
llvm_unreachable("Unexpected jump instruction.");
@@ -3479,26 +3548,46 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI,
// Return .new predicate version for an instruction.
int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI,
const MachineBranchProbabilityInfo *MBPI) const {
- int NewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode());
- if (NewOpcode >= 0) // Valid predicate new instruction
- return NewOpcode;
-
switch (MI.getOpcode()) {
// Condtional Jumps
case Hexagon::J2_jumpt:
case Hexagon::J2_jumpf:
return getDotNewPredJumpOp(MI, MBPI);
-
- default:
- assert(0 && "Unknown .new type");
}
- return 0;
+
+ int NewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode());
+ if (NewOpcode >= 0)
+ return NewOpcode;
+
+ dbgs() << "Cannot convert to .new: " << getName(MI.getOpcode()) << '\n';
+ llvm_unreachable(nullptr);
}
-int HexagonInstrInfo::getDotOldOp(const int opc) const {
- int NewOp = opc;
+int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const {
+ int NewOp = MI.getOpcode();
if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
NewOp = Hexagon::getPredOldOpcode(NewOp);
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
+ // All Hexagon architectures have prediction bits on dot-new branches,
+ // but only Hexagon V60+ has prediction bits on dot-old ones. Make sure
+ // to pick the right opcode when converting back to dot-old.
+ if (!HST.getFeatureBits()[Hexagon::ArchV60]) {
+ switch (NewOp) {
+ case Hexagon::J2_jumptpt:
+ NewOp = Hexagon::J2_jumpt;
+ break;
+ case Hexagon::J2_jumpfpt:
+ NewOp = Hexagon::J2_jumpf;
+ break;
+ case Hexagon::J2_jumprtpt:
+ NewOp = Hexagon::J2_jumprt;
+ break;
+ case Hexagon::J2_jumprfpt:
+ NewOp = Hexagon::J2_jumprf;
+ break;
+ }
+ }
assert(NewOp >= 0 &&
"Couldn't change predicate new instruction to its old form.");
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 2358d4b7e4c0..b268c7a28171 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -235,7 +235,7 @@ public:
/// Return true if the specified instruction can be predicated.
/// By default, this returns true for every instruction with a
/// PredicateOperand.
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
/// Test if the given instruction should be considered a scheduling boundary.
/// This primarily includes labels and terminators.
@@ -404,7 +404,7 @@ public:
const MachineBranchProbabilityInfo *MBPI) const;
int getDotNewPredOp(const MachineInstr &MI,
const MachineBranchProbabilityInfo *MBPI) const;
- int getDotOldOp(const int opc) const;
+ int getDotOldOp(const MachineInstr &MI) const;
HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr &MI)
const;
short getEquivalentHWInstr(const MachineInstr &MI) const;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
deleted file mode 100644
index c5719ad5b6d8..000000000000
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ /dev/null
@@ -1,4799 +0,0 @@
-//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrFormats.td"
-include "HexagonOperands.td"
-include "HexagonInstrEnc.td"
-
-//===----------------------------------------------------------------------===//
-// Compare
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isCompare = 1, InputType = "imm", isExtendable = 1,
- opExtendable = 2 in
-class T_CMP <string mnemonic, bits<2> MajOp, bit isNot, Operand ImmOp>
- : ALU32Inst <(outs PredRegs:$dst),
- (ins IntRegs:$src1, ImmOp:$src2),
- "$dst = "#!if(isNot, "!","")#mnemonic#"($src1, #$src2)",
- [], "",ALU32_2op_tc_2early_SLOT0123 >, ImmRegRel {
- bits<2> dst;
- bits<5> src1;
- bits<10> src2;
- let CextOpcode = mnemonic;
- let opExtentBits = !if(!eq(mnemonic, "cmp.gtu"), 9, 10);
- let isExtentSigned = !if(!eq(mnemonic, "cmp.gtu"), 0, 1);
-
- let IClass = 0b0111;
-
- let Inst{27-24} = 0b0101;
- let Inst{23-22} = MajOp;
- let Inst{21} = !if(!eq(mnemonic, "cmp.gtu"), 0, src2{9});
- let Inst{20-16} = src1;
- let Inst{13-5} = src2{8-0};
- let Inst{4} = isNot;
- let Inst{3-2} = 0b00;
- let Inst{1-0} = dst;
- }
-
-def C2_cmpeqi : T_CMP <"cmp.eq", 0b00, 0, s10_0Ext>;
-def C2_cmpgti : T_CMP <"cmp.gt", 0b01, 0, s10_0Ext>;
-def C2_cmpgtui : T_CMP <"cmp.gtu", 0b10, 0, u9_0Ext>;
-
-//===----------------------------------------------------------------------===//
-// ALU32/ALU +
-//===----------------------------------------------------------------------===//
-// Add.
-
-let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in
-class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev,
- bit IsComm>
- : ALU32_rr<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = "#mnemonic#"($Rs, $Rt)",
- [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredRel {
- let isCommutable = IsComm;
- let BaseOpcode = mnemonic#_rr;
- let CextOpcode = mnemonic;
-
- bits<5> Rs;
- bits<5> Rt;
- bits<5> Rd;
-
- let IClass = 0b1111;
- let Inst{27} = 0b0;
- let Inst{26-24} = MajOp;
- let Inst{23-21} = MinOp;
- let Inst{20-16} = !if(OpsRev,Rt,Rs);
- let Inst{12-8} = !if(OpsRev,Rs,Rt);
- let Inst{4-0} = Rd;
-}
-
-let hasSideEffects = 0, hasNewValue = 1 in
-class T_ALU32_3op_pred<string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit OpsRev, bit PredNot, bit PredNew>
- : ALU32_rr<(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
- "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") "#
- "$Rd = "#mnemonic#"($Rs, $Rt)",
- [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredNewRel {
- let isPredicated = 1;
- let isPredicatedFalse = PredNot;
- let isPredicatedNew = PredNew;
- let BaseOpcode = mnemonic#_rr;
- let CextOpcode = mnemonic;
-
- bits<2> Pu;
- bits<5> Rs;
- bits<5> Rt;
- bits<5> Rd;
-
- let IClass = 0b1111;
- let Inst{27} = 0b1;
- let Inst{26-24} = MajOp;
- let Inst{23-21} = MinOp;
- let Inst{20-16} = !if(OpsRev,Rt,Rs);
- let Inst{13} = PredNew;
- let Inst{12-8} = !if(OpsRev,Rs,Rt);
- let Inst{7} = PredNot;
- let Inst{6-5} = Pu;
- let Inst{4-0} = Rd;
-}
-
-class T_ALU32_combineh<string Op1, string Op2, bits<3> MajOp, bits<3> MinOp,
- bit OpsRev>
- : T_ALU32_3op<"", MajOp, MinOp, OpsRev, 0> {
- let AsmString = "$Rd = combine($Rs"#Op1#", $Rt"#Op2#")";
-}
-
-def A2_combine_hh : T_ALU32_combineh<".h", ".h", 0b011, 0b100, 1>;
-def A2_combine_hl : T_ALU32_combineh<".h", ".l", 0b011, 0b101, 1>;
-def A2_combine_lh : T_ALU32_combineh<".l", ".h", 0b011, 0b110, 1>;
-def A2_combine_ll : T_ALU32_combineh<".l", ".l", 0b011, 0b111, 1>;
-
-class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp,
- bits<3> MinOp, bit OpsRev, bit IsComm>
- : T_ALU32_3op<"", MajOp, MinOp, OpsRev, IsComm> {
- let AsmString = "$Rd = "#mnemonic#"($Rs, $Rt)"#suffix;
-}
-
-def A2_svaddh : T_ALU32_3op<"vaddh", 0b110, 0b000, 0, 1>;
-def A2_svsubh : T_ALU32_3op<"vsubh", 0b110, 0b100, 1, 0>;
-
-let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123 in {
- def A2_svaddhs : T_ALU32_3op_sfx<"vaddh", ":sat", 0b110, 0b001, 0, 1>;
- def A2_addsat : T_ALU32_3op_sfx<"add", ":sat", 0b110, 0b010, 0, 1>;
- def A2_svadduhs : T_ALU32_3op_sfx<"vadduh", ":sat", 0b110, 0b011, 0, 1>;
- def A2_svsubhs : T_ALU32_3op_sfx<"vsubh", ":sat", 0b110, 0b101, 1, 0>;
- def A2_subsat : T_ALU32_3op_sfx<"sub", ":sat", 0b110, 0b110, 1, 0>;
- def A2_svsubuhs : T_ALU32_3op_sfx<"vsubuh", ":sat", 0b110, 0b111, 1, 0>;
-}
-
-let Itinerary = ALU32_3op_tc_2_SLOT0123 in
-def A2_svavghs : T_ALU32_3op_sfx<"vavgh", ":rnd", 0b111, 0b001, 0, 1>;
-
-def A2_svavgh : T_ALU32_3op<"vavgh", 0b111, 0b000, 0, 1>;
-def A2_svnavgh : T_ALU32_3op<"vnavgh", 0b111, 0b011, 1, 0>;
-
-multiclass T_ALU32_3op_p<string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit OpsRev> {
- def t : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 0>;
- def f : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 0>;
- def tnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 1>;
- def fnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 1>;
-}
-
-multiclass T_ALU32_3op_A2<string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit OpsRev, bit IsComm> {
- let isPredicable = 1 in
- def A2_#NAME : T_ALU32_3op <mnemonic, MajOp, MinOp, OpsRev, IsComm>;
- defm A2_p#NAME : T_ALU32_3op_p<mnemonic, MajOp, MinOp, OpsRev>;
-}
-
-defm add : T_ALU32_3op_A2<"add", 0b011, 0b000, 0, 1>;
-defm and : T_ALU32_3op_A2<"and", 0b001, 0b000, 0, 1>;
-defm or : T_ALU32_3op_A2<"or", 0b001, 0b001, 0, 1>;
-defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>;
-defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>;
-
-// A few special cases producing register pairs:
-let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in {
- def S2_packhl : T_ALU32_3op <"packhl", 0b101, 0b100, 0, 0>;
-
- let isPredicable = 1 in
- def A2_combinew : T_ALU32_3op <"combine", 0b101, 0b000, 0, 0>;
-
- // Conditional combinew uses "newt/f" instead of "t/fnew".
- def C2_ccombinewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 0>;
- def C2_ccombinewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 0>;
- def C2_ccombinewnewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 1>;
- def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>;
-}
-
-let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in
-class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
- : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Pd = "#mnemonic#"($Rs, $Rt)",
- [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel {
- let CextOpcode = mnemonic;
- let isCommutable = IsComm;
- bits<5> Rs;
- bits<5> Rt;
- bits<2> Pd;
-
- let IClass = 0b1111;
- let Inst{27-24} = 0b0010;
- let Inst{22-21} = MinOp;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4} = IsNeg;
- let Inst{3-2} = 0b00;
- let Inst{1-0} = Pd;
-}
-
-let Itinerary = ALU32_3op_tc_2early_SLOT0123 in {
- def C2_cmpeq : T_ALU32_3op_cmp< "cmp.eq", 0b00, 0, 1>;
- def C2_cmpgt : T_ALU32_3op_cmp< "cmp.gt", 0b10, 0, 0>;
- def C2_cmpgtu : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>;
-}
-
-let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in
-def C2_mux: ALU32_rr<(outs IntRegs:$Rd),
- (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = mux($Pu, $Rs, $Rt)", [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel {
- bits<5> Rd;
- bits<2> Pu;
- bits<5> Rs;
- bits<5> Rt;
-
- let CextOpcode = "mux";
- let InputType = "reg";
- let hasSideEffects = 0;
- let IClass = 0b1111;
-
- let Inst{27-24} = 0b0100;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{6-5} = Pu;
- let Inst{4-0} = Rd;
-}
-
-// Combines the two immediates into a double register.
-// Increase complexity to make it greater than any complexity of a combine
-// that involves a register.
-
-let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
- isExtentSigned = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 1,
- AddedComplexity = 75 in
-def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8_0Ext:$s8, s8_0Imm:$S8),
- "$Rdd = combine(#$s8, #$S8)",
- []> {
- bits<5> Rdd;
- bits<8> s8;
- bits<8> S8;
-
- let IClass = 0b0111;
- let Inst{27-23} = 0b11000;
- let Inst{22-16} = S8{7-1};
- let Inst{13} = S8{0};
- let Inst{12-5} = s8;
- let Inst{4-0} = Rdd;
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated ADD of a reg and an Immediate value.
-//===----------------------------------------------------------------------===//
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_Addri_Pred <bit PredNot, bit PredNew>
- : ALU32_ri <(outs IntRegs:$Rd),
- (ins PredRegs:$Pu, IntRegs:$Rs, s8_0Ext:$s8),
- !if(PredNot, "if (!$Pu", "if ($Pu")#!if(PredNew,".new) $Rd = ",
- ") $Rd = ")#"add($Rs, #$s8)"> {
- bits<5> Rd;
- bits<2> Pu;
- bits<5> Rs;
- bits<8> s8;
-
- let isPredicatedNew = PredNew;
- let IClass = 0b0111;
-
- let Inst{27-24} = 0b0100;
- let Inst{23} = PredNot;
- let Inst{22-21} = Pu;
- let Inst{20-16} = Rs;
- let Inst{13} = PredNew;
- let Inst{12-5} = s8;
- let Inst{4-0} = Rd;
- }
-
-//===----------------------------------------------------------------------===//
-// A2_addi: Add a signed immediate to a register.
-//===----------------------------------------------------------------------===//
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_Addri <Operand immOp>
- : ALU32_ri <(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, immOp:$s16),
- "$Rd = add($Rs, #$s16)", [], "", ALU32_ADDI_tc_1_SLOT0123> {
- bits<5> Rd;
- bits<5> Rs;
- bits<16> s16;
-
- let IClass = 0b1011;
-
- let Inst{27-21} = s16{15-9};
- let Inst{20-16} = Rs;
- let Inst{13-5} = s16{8-0};
- let Inst{4-0} = Rd;
- }
-
-//===----------------------------------------------------------------------===//
-// Multiclass for ADD of a register and an immediate value.
-//===----------------------------------------------------------------------===//
-multiclass Addri_Pred<string mnemonic, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- def NAME : T_Addri_Pred<PredNot, 0>;
- // Predicate new
- def NAME#new : T_Addri_Pred<PredNot, 1>;
- }
-}
-
-let isExtendable = 1, isExtentSigned = 1, InputType = "imm" in
-multiclass Addri_base<string mnemonic, SDNode OpNode> {
- let CextOpcode = mnemonic, BaseOpcode = mnemonic#_ri in {
- let opExtendable = 2, opExtentBits = 16, isPredicable = 1, isAdd = 1 in
- def A2_#NAME : T_Addri<s16_0Ext>;
-
- let opExtendable = 3, opExtentBits = 8, isPredicated = 1 in {
- defm A2_p#NAME#t : Addri_Pred<mnemonic, 0>;
- defm A2_p#NAME#f : Addri_Pred<mnemonic, 1>;
- }
- }
-}
-
-defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel;
-
-let hasNewValue = 1, hasSideEffects = 0, isPseudo = 1 in
-def A2_iconst
- : ALU32_ri <(outs IntRegs:$Rd),
- (ins s23_2Imm:$s23_2),
- "$Rd = iconst(#$s23_2)"> {}
-
-//===----------------------------------------------------------------------===//
-// Template class used for the following ALU32 instructions.
-// Rd=and(Rs,#s10)
-// Rd=or(Rs,#s10)
-//===----------------------------------------------------------------------===//
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
-InputType = "imm", hasNewValue = 1 in
-class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp>
- : ALU32_ri <(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, s10_0Ext:$s10),
- "$Rd = "#mnemonic#"($Rs, #$s10)" ,
- []> {
- bits<5> Rd;
- bits<5> Rs;
- bits<10> s10;
- let CextOpcode = mnemonic;
-
- let IClass = 0b0111;
-
- let Inst{27-24} = 0b0110;
- let Inst{23-22} = MinOp;
- let Inst{21} = s10{9};
- let Inst{20-16} = Rs;
- let Inst{13-5} = s10{8-0};
- let Inst{4-0} = Rd;
- }
-
-def A2_orir : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel;
-def A2_andir : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel;
-
-// Subtract register from immediate
-// Rd32=sub(#s10,Rs32)
-let isExtendable = 1, CextOpcode = "sub", opExtendable = 1, isExtentSigned = 1,
- opExtentBits = 10, InputType = "imm", hasNewValue = 1, hasSideEffects = 0 in
-def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10_0Ext:$s10, IntRegs:$Rs),
- "$Rd = sub(#$s10, $Rs)", []>, ImmRegRel {
- bits<5> Rd;
- bits<10> s10;
- bits<5> Rs;
-
- let IClass = 0b0111;
-
- let Inst{27-22} = 0b011001;
- let Inst{21} = s10{9};
- let Inst{20-16} = Rs;
- let Inst{13-5} = s10{8-0};
- let Inst{4-0} = Rd;
- }
-
-// Nop.
-let hasSideEffects = 0 in
-def A2_nop: ALU32Inst <(outs), (ins), "nop" > {
- let IClass = 0b0111;
- let Inst{27-24} = 0b1111;
-}
-
-let hasSideEffects = 0, hasNewValue = 1 in
-class T_tfr16<bit isHi>
- : ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16_0Imm:$u16),
- "$Rx"#!if(isHi, ".h", ".l")#" = #$u16",
- [], "$src1 = $Rx" > {
- bits<5> Rx;
- bits<16> u16;
-
- let IClass = 0b0111;
- let Inst{27-26} = 0b00;
- let Inst{25-24} = !if(isHi, 0b10, 0b01);
- let Inst{23-22} = u16{15-14};
- let Inst{21} = 0b1;
- let Inst{20-16} = Rx;
- let Inst{13-0} = u16{13-0};
- }
-
-def A2_tfril: T_tfr16<0>;
-def A2_tfrih: T_tfr16<1>;
-
-// Conditional transfer is an alias to conditional "Rd = add(Rs, #0)".
-let isPredicated = 1, hasNewValue = 1, opNewValue = 0 in
-class T_tfr_pred<bit isPredNot, bit isPredNew>
- : ALU32Inst<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ("#!if(isPredNot, "!", "")#
- "$src1"#!if(isPredNew, ".new", "")#
- ") $dst = $src2"> {
- bits<5> dst;
- bits<2> src1;
- bits<5> src2;
-
- let isPredicatedFalse = isPredNot;
- let isPredicatedNew = isPredNew;
- let IClass = 0b0111;
-
- let Inst{27-24} = 0b0100;
- let Inst{23} = isPredNot;
- let Inst{13} = isPredNew;
- let Inst{12-5} = 0;
- let Inst{4-0} = dst;
- let Inst{22-21} = src1;
- let Inst{20-16} = src2;
- }
-
-let isPredicable = 1 in
-class T_tfr : ALU32Inst<(outs IntRegs:$dst), (ins IntRegs:$src),
- "$dst = $src"> {
- bits<5> dst;
- bits<5> src;
-
- let IClass = 0b0111;
-
- let Inst{27-21} = 0b0000011;
- let Inst{20-16} = src;
- let Inst{13} = 0b0;
- let Inst{4-0} = dst;
- }
-
-let InputType = "reg", hasNewValue = 1, hasSideEffects = 0 in
-multiclass tfr_base<string CextOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp in {
- def NAME : T_tfr;
-
- // Predicate
- def t : T_tfr_pred<0, 0>;
- def f : T_tfr_pred<1, 0>;
- // Predicate new
- def tnew : T_tfr_pred<0, 1>;
- def fnew : T_tfr_pred<1, 1>;
- }
-}
-
-// Assembler mapped to C2_ccombinew[t|f|newt|newf].
-// Please don't add bits to this instruction as it'll be converted into
-// 'combine' before object code emission.
-let isPredicated = 1 in
-class T_tfrp_pred<bit PredNot, bit PredNew>
- : ALU32_rr <(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2),
- "if ("#!if(PredNot, "!", "")#"$src1"
- #!if(PredNew, ".new", "")#") $dst = $src2" > {
- let isPredicatedFalse = PredNot;
- let isPredicatedNew = PredNew;
- }
-
-// Assembler mapped to A2_combinew.
-// Please don't add bits to this instruction as it'll be converted into
-// 'combine' before object code emission.
-class T_tfrp : ALU32Inst <(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src),
- "$dst = $src">;
-
-let hasSideEffects = 0 in
-multiclass TFR64_base<string BaseName> {
- let BaseOpcode = BaseName in {
- let isPredicable = 1 in
- def NAME : T_tfrp;
- // Predicate
- def t : T_tfrp_pred <0, 0>;
- def f : T_tfrp_pred <1, 0>;
- // Predicate new
- def tnew : T_tfrp_pred <0, 1>;
- def fnew : T_tfrp_pred <1, 1>;
- }
-}
-
-let InputType = "imm", isExtendable = 1, isExtentSigned = 1, opExtentBits = 12,
- isMoveImm = 1, opExtendable = 2, BaseOpcode = "TFRI", CextOpcode = "TFR",
- hasSideEffects = 0, isPredicated = 1, hasNewValue = 1 in
-class T_TFRI_Pred<bit PredNot, bit PredNew>
- : ALU32_ri<(outs IntRegs:$Rd), (ins PredRegs:$Pu, s12_0Ext:$s12),
- "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") $Rd = #$s12",
- [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredNewRel {
- let isPredicatedFalse = PredNot;
- let isPredicatedNew = PredNew;
-
- bits<5> Rd;
- bits<2> Pu;
- bits<12> s12;
-
- let IClass = 0b0111;
- let Inst{27-24} = 0b1110;
- let Inst{23} = PredNot;
- let Inst{22-21} = Pu;
- let Inst{20} = 0b0;
- let Inst{19-16,12-5} = s12;
- let Inst{13} = PredNew;
- let Inst{4-0} = Rd;
-}
-
-def C2_cmoveit : T_TFRI_Pred<0, 0>;
-def C2_cmoveif : T_TFRI_Pred<1, 0>;
-def C2_cmovenewit : T_TFRI_Pred<0, 1>;
-def C2_cmovenewif : T_TFRI_Pred<1, 1>;
-
-let InputType = "imm", isExtendable = 1, isExtentSigned = 1,
- CextOpcode = "TFR", BaseOpcode = "TFRI", hasNewValue = 1, opNewValue = 0,
- isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1,
- isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in
-def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16_0Ext:$s16), "$Rd = #$s16",
- [], "", ALU32_2op_tc_1_SLOT0123>,
- ImmRegRel, PredRel {
- bits<5> Rd;
- bits<16> s16;
-
- let IClass = 0b0111;
- let Inst{27-24} = 0b1000;
- let Inst{23-22,20-16,13-5} = s16;
- let Inst{4-0} = Rd;
-}
-
-defm A2_tfr : tfr_base<"TFR">, ImmRegRel, PredNewRel;
-let isAsmParserOnly = 1 in
-defm A2_tfrp : TFR64_base<"TFR64">, PredNewRel;
-
-// Assembler mapped
-let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
- isAsmParserOnly = 1 in
-def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8_0Imm64:$src1),
- "$dst = #$src1",
- []>;
-
-// TODO: see if this instruction can be deleted..
-let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
- isAsmParserOnly = 1 in {
-def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64_0Imm:$src1),
- "$dst = #$src1">;
-def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst),
- (ins s8_0Ext:$src1, s8_0Imm:$src2),
- "$dst = combine(##$src1, #$src2)">;
-}
-
-//===----------------------------------------------------------------------===//
-// ALU32/ALU -
-//===----------------------------------------------------------------------===//
-
-
-//===----------------------------------------------------------------------===//
-// ALU32/PERM +
-//===----------------------------------------------------------------------===//
-// Scalar mux register immediate.
-let hasSideEffects = 0, isExtentSigned = 1, CextOpcode = "MUX",
- InputType = "imm", hasNewValue = 1, isExtendable = 1, opExtentBits = 8 in
-class T_MUX1 <bit MajOp, dag ins, string AsmStr>
- : ALU32Inst <(outs IntRegs:$Rd), ins, AsmStr>, ImmRegRel {
- bits<5> Rd;
- bits<2> Pu;
- bits<8> s8;
- bits<5> Rs;
-
- let IClass = 0b0111;
- let Inst{27-24} = 0b0011;
- let Inst{23} = MajOp;
- let Inst{22-21} = Pu;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-5} = s8;
- let Inst{4-0} = Rd;
-}
-
-let opExtendable = 2 in
-def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8_0Ext:$s8, IntRegs:$Rs),
- "$Rd = mux($Pu, #$s8, $Rs)">;
-
-let opExtendable = 3 in
-def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8_0Ext:$s8),
- "$Rd = mux($Pu, $Rs, #$s8)">;
-
-// C2_muxii: Scalar mux immediates.
-let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1,
- opExtentBits = 8, opExtendable = 2 in
-def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
- (ins PredRegs:$Pu, s8_0Ext:$s8, s8_0Imm:$S8),
- "$Rd = mux($Pu, #$s8, #$S8)" ,
- []> {
- bits<5> Rd;
- bits<2> Pu;
- bits<8> s8;
- bits<8> S8;
-
- let IClass = 0b0111;
-
- let Inst{27-25} = 0b101;
- let Inst{24-23} = Pu;
- let Inst{22-16} = S8{7-1};
- let Inst{13} = S8{0};
- let Inst{12-5} = s8;
- let Inst{4-0} = Rd;
- }
-
-let isCodeGenOnly = 1, isPseudo = 1 in
-def PS_pselect : ALU64_rr<(outs DoubleRegs:$Rd),
- (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
- ".error \"should not emit\" ", []>;
-
-
-//===----------------------------------------------------------------------===//
-// template class for non-predicated alu32_2op instructions
-// - aslh, asrh, sxtb, sxth, zxth
-//===----------------------------------------------------------------------===//
-let hasNewValue = 1, opNewValue = 0 in
-class T_ALU32_2op <string mnemonic, bits<3> minOp> :
- ALU32Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
- "$Rd = "#mnemonic#"($Rs)", [] > {
- bits<5> Rd;
- bits<5> Rs;
-
- let IClass = 0b0111;
-
- let Inst{27-24} = 0b0000;
- let Inst{23-21} = minOp;
- let Inst{13} = 0b0;
- let Inst{4-0} = Rd;
- let Inst{20-16} = Rs;
-}
-
-//===----------------------------------------------------------------------===//
-// template class for predicated alu32_2op instructions
-// - aslh, asrh, sxtb, sxth, zxtb, zxth
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot,
- bit isPredNew > :
- ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs),
- !if(isPredNot, "if (!$Pu", "if ($Pu")
- #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> {
- bits<5> Rd;
- bits<2> Pu;
- bits<5> Rs;
-
- let IClass = 0b0111;
-
- let Inst{27-24} = 0b0000;
- let Inst{23-21} = minOp;
- let Inst{13} = 0b1;
- let Inst{11} = isPredNot;
- let Inst{10} = isPredNew;
- let Inst{4-0} = Rd;
- let Inst{9-8} = Pu;
- let Inst{20-16} = Rs;
-}
-
-multiclass ALU32_2op_Pred<string mnemonic, bits<3> minOp, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- def NAME : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 0>;
-
- // Predicate new
- let isPredicatedNew = 1 in
- def NAME#new : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 1>;
- }
-}
-
-multiclass ALU32_2op_base<string mnemonic, bits<3> minOp> {
- let BaseOpcode = mnemonic in {
- let isPredicable = 1, hasSideEffects = 0 in
- def A2_#NAME : T_ALU32_2op<mnemonic, minOp>;
-
- let isPredicated = 1, hasSideEffects = 0 in {
- defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
- defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
- }
- }
-}
-
-defm aslh : ALU32_2op_base<"aslh", 0b000>, PredNewRel;
-defm asrh : ALU32_2op_base<"asrh", 0b001>, PredNewRel;
-defm sxtb : ALU32_2op_base<"sxtb", 0b101>, PredNewRel;
-defm sxth : ALU32_2op_base<"sxth", 0b111>, PredNewRel;
-defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel;
-
-// Rd=zxtb(Rs): assembler mapped to Rd=and(Rs,#255).
-// Compiler would want to generate 'zxtb' instead of 'and' because 'zxtb' has
-// predicated forms while 'and' doesn't. Since integrated assembler can't
-// handle 'mapped' instructions, we need to encode 'zxtb' same as 'and' where
-// immediate operand is set to '255'.
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_ZXTB: ALU32Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rs),
- "$Rd = zxtb($Rs)", [] > { // Rd = and(Rs,255)
- bits<5> Rd;
- bits<5> Rs;
- bits<10> s10 = 255;
-
- let IClass = 0b0111;
-
- let Inst{27-22} = 0b011000;
- let Inst{4-0} = Rd;
- let Inst{20-16} = Rs;
- let Inst{21} = s10{9};
- let Inst{13-5} = s10{8-0};
-}
-
-//Rd=zxtb(Rs): assembler mapped to "Rd=and(Rs,#255)
-multiclass ZXTB_base <string mnemonic, bits<3> minOp> {
- let BaseOpcode = mnemonic in {
- let isPredicable = 1, hasSideEffects = 0 in
- def A2_#NAME : T_ZXTB;
-
- let isPredicated = 1, hasSideEffects = 0 in {
- defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
- defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
- }
- }
-}
-
-defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel;
-
-//===----------------------------------------------------------------------===//
-// Template class for vector add and avg
-//===----------------------------------------------------------------------===//
-
-class T_VectALU_64 <string opc, bits<3> majOp, bits<3> minOp,
- bit isSat, bit isRnd, bit isCrnd, bit SwapOps >
- : ALU64_rr < (outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rdd = "#opc#"($Rss, $Rtt)"#!if(isRnd, ":rnd", "")
- #!if(isCrnd,":crnd","")
- #!if(isSat, ":sat", ""),
- [], "", ALU64_tc_2_SLOT23 > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1101;
-
- let Inst{27-24} = 0b0011;
- let Inst{23-21} = majOp;
- let Inst{20-16} = !if (SwapOps, Rtt, Rss);
- let Inst{12-8} = !if (SwapOps, Rss, Rtt);
- let Inst{7-5} = minOp;
- let Inst{4-0} = Rdd;
- }
-
-// ALU64 - Vector add
-// Rdd=vadd[u][bhw](Rss,Rtt)
-let Itinerary = ALU64_tc_1_SLOT23 in {
- def A2_vaddub : T_VectALU_64 < "vaddub", 0b000, 0b000, 0, 0, 0, 0>;
- def A2_vaddh : T_VectALU_64 < "vaddh", 0b000, 0b010, 0, 0, 0, 0>;
- def A2_vaddw : T_VectALU_64 < "vaddw", 0b000, 0b101, 0, 0, 0, 0>;
-}
-
-// Rdd=vadd[u][bhw](Rss,Rtt):sat
-let Defs = [USR_OVF] in {
- def A2_vaddubs : T_VectALU_64 < "vaddub", 0b000, 0b001, 1, 0, 0, 0>;
- def A2_vaddhs : T_VectALU_64 < "vaddh", 0b000, 0b011, 1, 0, 0, 0>;
- def A2_vadduhs : T_VectALU_64 < "vadduh", 0b000, 0b100, 1, 0, 0, 0>;
- def A2_vaddws : T_VectALU_64 < "vaddw", 0b000, 0b110, 1, 0, 0, 0>;
-}
-
-// ALU64 - Vector average
-// Rdd=vavg[u][bhw](Rss,Rtt)
-let Itinerary = ALU64_tc_1_SLOT23 in {
- def A2_vavgub : T_VectALU_64 < "vavgub", 0b010, 0b000, 0, 0, 0, 0>;
- def A2_vavgh : T_VectALU_64 < "vavgh", 0b010, 0b010, 0, 0, 0, 0>;
- def A2_vavguh : T_VectALU_64 < "vavguh", 0b010, 0b101, 0, 0, 0, 0>;
- def A2_vavgw : T_VectALU_64 < "vavgw", 0b011, 0b000, 0, 0, 0, 0>;
- def A2_vavguw : T_VectALU_64 < "vavguw", 0b011, 0b011, 0, 0, 0, 0>;
-}
-
-// Rdd=vavg[u][bhw](Rss,Rtt)[:rnd|:crnd]
-def A2_vavgubr : T_VectALU_64 < "vavgub", 0b010, 0b001, 0, 1, 0, 0>;
-def A2_vavghr : T_VectALU_64 < "vavgh", 0b010, 0b011, 0, 1, 0, 0>;
-def A2_vavghcr : T_VectALU_64 < "vavgh", 0b010, 0b100, 0, 0, 1, 0>;
-def A2_vavguhr : T_VectALU_64 < "vavguh", 0b010, 0b110, 0, 1, 0, 0>;
-
-def A2_vavgwr : T_VectALU_64 < "vavgw", 0b011, 0b001, 0, 1, 0, 0>;
-def A2_vavgwcr : T_VectALU_64 < "vavgw", 0b011, 0b010, 0, 0, 1, 0>;
-def A2_vavguwr : T_VectALU_64 < "vavguw", 0b011, 0b100, 0, 1, 0, 0>;
-
-// Rdd=vnavg[bh](Rss,Rtt)
-let Itinerary = ALU64_tc_1_SLOT23 in {
- def A2_vnavgh : T_VectALU_64 < "vnavgh", 0b100, 0b000, 0, 0, 0, 1>;
- def A2_vnavgw : T_VectALU_64 < "vnavgw", 0b100, 0b011, 0, 0, 0, 1>;
-}
-
-// Rdd=vnavg[bh](Rss,Rtt)[:rnd|:crnd]:sat
-let Defs = [USR_OVF] in {
- def A2_vnavghr : T_VectALU_64 < "vnavgh", 0b100, 0b001, 1, 1, 0, 1>;
- def A2_vnavghcr : T_VectALU_64 < "vnavgh", 0b100, 0b010, 1, 0, 1, 1>;
- def A2_vnavgwr : T_VectALU_64 < "vnavgw", 0b100, 0b100, 1, 1, 0, 1>;
- def A2_vnavgwcr : T_VectALU_64 < "vnavgw", 0b100, 0b110, 1, 0, 1, 1>;
-}
-
-// Rdd=vsub[u][bh](Rss,Rtt)
-let Itinerary = ALU64_tc_1_SLOT23 in {
- def A2_vsubub : T_VectALU_64 < "vsubub", 0b001, 0b000, 0, 0, 0, 1>;
- def A2_vsubh : T_VectALU_64 < "vsubh", 0b001, 0b010, 0, 0, 0, 1>;
- def A2_vsubw : T_VectALU_64 < "vsubw", 0b001, 0b101, 0, 0, 0, 1>;
-}
-
-// Rdd=vsub[u][bh](Rss,Rtt):sat
-let Defs = [USR_OVF] in {
- def A2_vsububs : T_VectALU_64 < "vsubub", 0b001, 0b001, 1, 0, 0, 1>;
- def A2_vsubhs : T_VectALU_64 < "vsubh", 0b001, 0b011, 1, 0, 0, 1>;
- def A2_vsubuhs : T_VectALU_64 < "vsubuh", 0b001, 0b100, 1, 0, 0, 1>;
- def A2_vsubws : T_VectALU_64 < "vsubw", 0b001, 0b110, 1, 0, 0, 1>;
-}
-
-// Rdd=vmax[u][bhw](Rss,Rtt)
-def A2_vmaxb : T_VectALU_64 < "vmaxb", 0b110, 0b110, 0, 0, 0, 1>;
-def A2_vmaxub : T_VectALU_64 < "vmaxub", 0b110, 0b000, 0, 0, 0, 1>;
-def A2_vmaxh : T_VectALU_64 < "vmaxh", 0b110, 0b001, 0, 0, 0, 1>;
-def A2_vmaxuh : T_VectALU_64 < "vmaxuh", 0b110, 0b010, 0, 0, 0, 1>;
-def A2_vmaxw : T_VectALU_64 < "vmaxw", 0b110, 0b011, 0, 0, 0, 1>;
-def A2_vmaxuw : T_VectALU_64 < "vmaxuw", 0b101, 0b101, 0, 0, 0, 1>;
-
-// Rdd=vmin[u][bhw](Rss,Rtt)
-def A2_vminb : T_VectALU_64 < "vminb", 0b110, 0b111, 0, 0, 0, 1>;
-def A2_vminub : T_VectALU_64 < "vminub", 0b101, 0b000, 0, 0, 0, 1>;
-def A2_vminh : T_VectALU_64 < "vminh", 0b101, 0b001, 0, 0, 0, 1>;
-def A2_vminuh : T_VectALU_64 < "vminuh", 0b101, 0b010, 0, 0, 0, 1>;
-def A2_vminw : T_VectALU_64 < "vminw", 0b101, 0b011, 0, 0, 0, 1>;
-def A2_vminuw : T_VectALU_64 < "vminuw", 0b101, 0b100, 0, 0, 0, 1>;
-
-//===----------------------------------------------------------------------===//
-// Template class for vector compare
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in
-class T_vcmp <string Str, bits<4> minOp>
- : ALU64_rr <(outs PredRegs:$Pd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Pd = "#Str#"($Rss, $Rtt)", [],
- "", ALU64_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b00100;
- let Inst{13} = minOp{3};
- let Inst{7-5} = minOp{2-0};
- let Inst{1-0} = Pd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-// Vector compare bytes
-def A2_vcmpbeq : T_vcmp <"vcmpb.eq", 0b0110>;
-def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>;
-
-// Vector compare halfwords
-def A2_vcmpheq : T_vcmp <"vcmph.eq", 0b0011>;
-def A2_vcmphgt : T_vcmp <"vcmph.gt", 0b0100>;
-def A2_vcmphgtu : T_vcmp <"vcmph.gtu", 0b0101>;
-
-// Vector compare words
-def A2_vcmpweq : T_vcmp <"vcmpw.eq", 0b0000>;
-def A2_vcmpwgt : T_vcmp <"vcmpw.gt", 0b0001>;
-def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>;
-
-//===----------------------------------------------------------------------===//
-// ALU32/PERM -
-//===----------------------------------------------------------------------===//
-
-
-//===----------------------------------------------------------------------===//
-// ALU32/PRED +
-//===----------------------------------------------------------------------===//
-// No bits needed. If cmp.ge is found the assembler parser will
-// transform it to cmp.gt subtracting 1 from the immediate.
-let isPseudo = 1 in {
-def C2_cmpgei: ALU32Inst <
- (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8_0Ext:$s8),
- "$Pd = cmp.ge($Rs, #$s8)">;
-def C2_cmpgeui: ALU32Inst <
- (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8_0Ext:$s8),
- "$Pd = cmp.geu($Rs, #$s8)">;
-}
-
-
-//===----------------------------------------------------------------------===//
-// ALU32/PRED -
-//===----------------------------------------------------------------------===//
-
-
-//===----------------------------------------------------------------------===//
-// ALU64/ALU +
-//===----------------------------------------------------------------------===//
-// Add.
-//===----------------------------------------------------------------------===//
-// Template Class
-// Add/Subtract halfword
-// Rd=add(Rt.L,Rs.[HL])[:sat]
-// Rd=sub(Rt.L,Rs.[HL])[:sat]
-// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16]
-// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16]
-//===----------------------------------------------------------------------===//
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_XTYPE_ADD_SUB <bits<2> LHbits, bit isSat, bit hasShift, bit isSub>
- : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs),
- "$Rd = "#!if(isSub,"sub","add")#"($Rt."
- #!if(hasShift, !if(LHbits{1},"h","l"),"l") #", $Rs."
- #!if(hasShift, !if(LHbits{0},"h)","l)"), !if(LHbits{1},"h)","l)"))
- #!if(isSat,":sat","")
- #!if(hasShift,":<<16",""), [], "", ALU64_tc_1_SLOT23> {
- bits<5> Rd;
- bits<5> Rt;
- bits<5> Rs;
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b01010;
- let Inst{22} = hasShift;
- let Inst{21} = isSub;
- let Inst{7} = isSat;
- let Inst{6-5} = LHbits;
- let Inst{4-0} = Rd;
- let Inst{12-8} = Rt;
- let Inst{20-16} = Rs;
- }
-
-//Rd=sub(Rt.L,Rs.[LH])
-def A2_subh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 1>;
-def A2_subh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 1>;
-
-//Rd=add(Rt.L,Rs.[LH])
-def A2_addh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 0>;
-def A2_addh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 0>;
-
-let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
- //Rd=sub(Rt.L,Rs.[LH]):sat
- def A2_subh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 1>;
- def A2_subh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 1>;
-
- //Rd=add(Rt.L,Rs.[LH]):sat
- def A2_addh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 0>;
- def A2_addh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 0>;
-}
-
-//Rd=sub(Rt.[LH],Rs.[LH]):<<16
-def A2_subh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 1>;
-def A2_subh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 1>;
-def A2_subh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 1>;
-def A2_subh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 1>;
-
-//Rd=add(Rt.[LH],Rs.[LH]):<<16
-def A2_addh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 0>;
-def A2_addh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 0>;
-def A2_addh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 0>;
-def A2_addh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 0>;
-
-let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
- //Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
- def A2_subh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 1>;
- def A2_subh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 1>;
- def A2_subh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 1>;
- def A2_subh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 1>;
-
- //Rd=add(Rt.[LH],Rs.[LH]):sat:<<16
- def A2_addh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 0>;
- def A2_addh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 0>;
- def A2_addh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 0>;
- def A2_addh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 0>;
-}
-
-let hasSideEffects = 0, hasNewValue = 1 in
-def S2_parityp: ALU64Inst<(outs IntRegs:$Rd),
- (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
- "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-24} = 0b0000;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4-0} = Rd;
-}
-
-let hasNewValue = 1, opNewValue = 0, hasSideEffects = 0 in
-class T_XTYPE_MIN_MAX < bit isMax, bit isUnsigned >
- : ALU64Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs),
- "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","")
- #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rt;
- bits<5> Rs;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b01011;
- let Inst{22-21} = !if(isMax, 0b10, 0b01);
- let Inst{7} = isUnsigned;
- let Inst{4-0} = Rd;
- let Inst{12-8} = !if(isMax, Rs, Rt);
- let Inst{20-16} = !if(isMax, Rt, Rs);
- }
-
-def A2_min : T_XTYPE_MIN_MAX < 0, 0 >;
-def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >;
-def A2_max : T_XTYPE_MIN_MAX < 1, 0 >;
-def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >;
-
-class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm>
- : ALU64_rr<(outs PredRegs:$Pd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
- "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", ALU64_tc_2early_SLOT23> {
- let isCompare = 1;
- let isCommutable = IsComm;
- let hasSideEffects = 0;
-
- bits<2> Pd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-21} = 0b0010100;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7-5} = MinOp;
- let Inst{1-0} = Pd;
-}
-
-def C2_cmpeqp : T_cmp64_rr<"cmp.eq", 0b000, 1>;
-def C2_cmpgtp : T_cmp64_rr<"cmp.gt", 0b010, 0>;
-def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>;
-
-def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd),
- (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
- "$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
- let hasSideEffects = 0;
-
- bits<5> Rd;
- bits<2> Pu;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-24} = 0b0001;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{6-5} = Pu;
- let Inst{4-0} = Rd;
-}
-
-class T_ALU64_rr<string mnemonic, string suffix, bits<4> RegType,
- bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm,
- string Op2Pfx>
- : ALU64_rr<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
- "$Rd = " #mnemonic# "($Rs, " #Op2Pfx# "$Rt)" #suffix, [],
- "", ALU64_tc_1_SLOT23> {
- let hasSideEffects = 0;
- let isCommutable = IsComm;
-
- bits<5> Rs;
- bits<5> Rt;
- bits<5> Rd;
-
- let IClass = 0b1101;
- let Inst{27-24} = RegType;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = !if (OpsRev,Rt,Rs);
- let Inst{12-8} = !if (OpsRev,Rs,Rt);
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rd;
-}
-
-class T_ALU64_arith<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit IsSat,
- bit OpsRev, bit IsComm>
- : T_ALU64_rr<mnemonic, !if(IsSat,":sat",""), 0b0011, MajOp, MinOp, OpsRev,
- IsComm, "">;
-
-let isAdd = 1 in
-def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>;
-def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>;
-
-class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm,
- bit IsNeg>
- : T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm,
- !if(IsNeg,"~","")>;
-
-def A2_andp : T_ALU64_logical<"and", 0b000, 0, 1, 0>;
-def A2_orp : T_ALU64_logical<"or", 0b010, 0, 1, 0>;
-def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>;
-
-//===----------------------------------------------------------------------===//
-// ALU64/ALU -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ALU64/BIT +
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-// ALU64/BIT -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ALU64/PERM +
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-// ALU64/PERM -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// CR +
-//===----------------------------------------------------------------------===//
-// Logical reductions on predicates.
-
-// Looping instructions.
-
-// Pipelined looping instructions.
-
-// Logical operations on predicates.
-let hasSideEffects = 0 in
-class T_LOGICAL_1OP<string MnOp, bits<2> OpBits>
- : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps),
- "$Pd = " # MnOp # "($Ps)", [], "", CR_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<2> Ps;
-
- let IClass = 0b0110;
- let Inst{27-23} = 0b10111;
- let Inst{22-21} = OpBits;
- let Inst{20} = 0b0;
- let Inst{17-16} = Ps;
- let Inst{13} = 0b0;
- let Inst{1-0} = Pd;
-}
-
-def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>;
-def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>;
-def C2_not : T_LOGICAL_1OP<"not", 0b10>;
-
-let hasSideEffects = 0 in
-class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev>
- : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps, PredRegs:$Pt),
- "$Pd = " # MnOp # "($Ps, " # !if (IsNeg,"!","") # "$Pt)",
- [], "", CR_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<2> Ps;
- bits<2> Pt;
-
- let IClass = 0b0110;
- let Inst{27-24} = 0b1011;
- let Inst{23-21} = OpBits;
- let Inst{20} = 0b0;
- let Inst{17-16} = !if(Rev,Pt,Ps); // Rs and Rt are reversed for some
- let Inst{13} = 0b0; // instructions.
- let Inst{9-8} = !if(Rev,Ps,Pt);
- let Inst{1-0} = Pd;
-}
-
-def C2_and : T_LOGICAL_2OP<"and", 0b000, 0, 1>;
-def C2_or : T_LOGICAL_2OP<"or", 0b001, 0, 1>;
-def C2_xor : T_LOGICAL_2OP<"xor", 0b010, 0, 0>;
-def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>;
-def C2_orn : T_LOGICAL_2OP<"or", 0b111, 1, 1>;
-
-let hasSideEffects = 0, hasNewValue = 1 in
-def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt),
- "$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> {
- bits<5> Rd;
- bits<2> Ps;
- bits<2> Pt;
-
- let IClass = 0b1000;
- let Inst{27-24} = 0b1001;
- let Inst{22-21} = 0b00;
- let Inst{17-16} = Ps;
- let Inst{9-8} = Pt;
- let Inst{4-0} = Rd;
-}
-
-let hasSideEffects = 0 in
-def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt),
- "$Rd = mask($Pt)", [], "", S_2op_tc_1_SLOT23> {
- bits<5> Rd;
- bits<2> Pt;
-
- let IClass = 0b1000;
- let Inst{27-24} = 0b0110;
- let Inst{9-8} = Pt;
- let Inst{4-0} = Rd;
-}
-
-// User control register transfer.
-//===----------------------------------------------------------------------===//
-// CR -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// JR +
-//===----------------------------------------------------------------------===//
-
-class CondStr<string CReg, bit True, bit New> {
- string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
-}
-class JumpOpcStr<string Mnemonic, bit New, bit Taken> {
- string S = Mnemonic # !if(Taken, ":t", ":nt");
-}
-
-let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0,
- isPredicable = 1,
- isExtendable = 1, opExtendable = 0, isExtentSigned = 1,
- opExtentBits = 24, opExtentAlign = 2, InputType = "imm" in
-class T_JMP<string ExtStr>
- : JInst_CJUMP_UCJUMP<(outs), (ins brtarget:$dst),
- "jump " # ExtStr # "$dst",
- [], "", J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT> {
- bits<24> dst;
- let IClass = 0b0101;
-
- let Inst{27-25} = 0b100;
- let Inst{24-16} = dst{23-15};
- let Inst{13-1} = dst{14-2};
-}
-
-let isBranch = 1, Defs = [PC], hasSideEffects = 0, isPredicated = 1,
- isExtendable = 1, opExtendable = 1, isExtentSigned = 1,
- opExtentBits = 17, opExtentAlign = 2, InputType = "imm" in
-class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr>
- : JInst_CJUMP_UCJUMP<(outs), (ins PredRegs:$src, brtarget:$dst),
- CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
- JumpOpcStr<"jump", isPredNew, isTak>.S # " " #
- ExtStr # "$dst",
- [], "", J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT>, ImmRegRel {
- let isTaken = isTak;
- let isPredicatedFalse = PredNot;
- let isPredicatedNew = isPredNew;
- bits<2> src;
- bits<17> dst;
-
- let IClass = 0b0101;
-
- let Inst{27-24} = 0b1100;
- let Inst{21} = PredNot;
- let Inst{12} = isTak;
- let Inst{11} = isPredNew;
- let Inst{9-8} = src;
- let Inst{23-22} = dst{16-15};
- let Inst{20-16} = dst{14-10};
- let Inst{13} = dst{9};
- let Inst{7-1} = dst{8-2};
- }
-
-multiclass JMP_Pred<bit PredNot, string ExtStr> {
- def NAME : T_JMP_c<PredNot, 0, 0, ExtStr>; // not taken
- // Predicate new
- def NAME#newpt : T_JMP_c<PredNot, 1, 1, ExtStr>; // taken
- def NAME#new : T_JMP_c<PredNot, 1, 0, ExtStr>; // not taken
-}
-
-multiclass JMP_base<string BaseOp, string ExtStr> {
- let BaseOpcode = BaseOp in {
- def NAME : T_JMP<ExtStr>;
- defm t : JMP_Pred<0, ExtStr>;
- defm f : JMP_Pred<1, ExtStr>;
- }
-}
-
-// Jumps to address stored in a register, JUMPR_MISC
-// if ([[!]P[.new]]) jumpr[:t/nt] Rs
-let isBranch = 1, isIndirectBranch = 1, isBarrier = 1, Defs = [PC],
- isPredicable = 1, hasSideEffects = 0, InputType = "reg" in
-class T_JMPr
- : JRInst<(outs), (ins IntRegs:$dst),
- "jumpr $dst", [], "", J_tc_2early_SLOT2> {
- bits<5> dst;
-
- let IClass = 0b0101;
- let Inst{27-21} = 0b0010100;
- let Inst{20-16} = dst;
-}
-
-let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1,
- hasSideEffects = 0, InputType = "reg" in
-class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>
- : JRInst <(outs), (ins PredRegs:$src, IntRegs:$dst),
- CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
- JumpOpcStr<"jumpr", isPredNew, isTak>.S # " $dst", [],
- "", J_tc_2early_SLOT2> {
-
- let isTaken = isTak;
- let isPredicatedFalse = PredNot;
- let isPredicatedNew = isPredNew;
- bits<2> src;
- bits<5> dst;
-
- let IClass = 0b0101;
-
- let Inst{27-22} = 0b001101;
- let Inst{21} = PredNot;
- let Inst{20-16} = dst;
- let Inst{12} = isTak;
- let Inst{11} = isPredNew;
- let Inst{9-8} = src;
-}
-
-multiclass JMPR_Pred<bit PredNot> {
- def NAME : T_JMPr_c<PredNot, 0, 0>; // not taken
- // Predicate new
- def NAME#newpt : T_JMPr_c<PredNot, 1, 1>; // taken
- def NAME#new : T_JMPr_c<PredNot, 1, 0>; // not taken
-}
-
-multiclass JMPR_base<string BaseOp> {
- let BaseOpcode = BaseOp in {
- def NAME : T_JMPr;
- defm t : JMPR_Pred<0>;
- defm f : JMPR_Pred<1>;
- }
-}
-
-let isCall = 1, hasSideEffects = 1 in
-class JUMPR_MISC_CALLR<bit isPred, bit isPredNot,
- dag InputDag = (ins IntRegs:$Rs)>
- : JRInst<(outs), InputDag,
- !if(isPred, !if(isPredNot, "if (!$Pu) callr $Rs",
- "if ($Pu) callr $Rs"),
- "callr $Rs"),
- [], "", J_tc_2early_SLOT2> {
- bits<5> Rs;
- bits<2> Pu;
- let isPredicated = isPred;
- let isPredicatedFalse = isPredNot;
-
- let IClass = 0b0101;
- let Inst{27-25} = 0b000;
- let Inst{24-23} = !if (isPred, 0b10, 0b01);
- let Inst{22} = 0;
- let Inst{21} = isPredNot;
- let Inst{9-8} = !if (isPred, Pu, 0b00);
- let Inst{20-16} = Rs;
-
- }
-
-let Defs = VolatileV3.Regs in {
- def J2_callrt : JUMPR_MISC_CALLR<1, 0, (ins PredRegs:$Pu, IntRegs:$Rs)>;
- def J2_callrf : JUMPR_MISC_CALLR<1, 1, (ins PredRegs:$Pu, IntRegs:$Rs)>;
-}
-
-let isTerminator = 1, hasSideEffects = 0 in {
- defm J2_jump : JMP_base<"JMP", "">, PredNewRel;
-
- defm J2_jumpr : JMPR_base<"JMPr">, PredNewRel;
-
- let isReturn = 1, isPseudo = 1, isCodeGenOnly = 1 in
- defm PS_jmpret : JMPR_base<"JMPret">, PredNewRel;
-}
-
-let validSubTargets = HasV60SubT in
-multiclass JMPpt_base<string BaseOp> {
- let BaseOpcode = BaseOp in {
- def tpt : T_JMP_c <0, 0, 1, "">; // Predicate true - taken
- def fpt : T_JMP_c <1, 0, 1, "">; // Predicate false - taken
- }
-}
-
-let validSubTargets = HasV60SubT in
-multiclass JMPRpt_base<string BaseOp> {
- let BaseOpcode = BaseOp in {
- def tpt : T_JMPr_c<0, 0, 1>; // predicate true - taken
- def fpt : T_JMPr_c<1, 0, 1>; // predicate false - taken
- }
-}
-
-defm J2_jumpr : JMPRpt_base<"JMPr">;
-defm J2_jump : JMPpt_base<"JMP">;
-
-// A return through builtin_eh_return.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0,
- isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
-def EH_RETURN_JMPR : T_JMPr;
-
-//===----------------------------------------------------------------------===//
-// JR -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// LD +
-//===----------------------------------------------------------------------===//
-
-// Load - Base with Immediate offset addressing mode
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, AddedComplexity = 20 in
-class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp,
- Operand ImmOp>
- : LDInst<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset),
- "$dst = "#mnemonic#"($src1 + #$offset)", []>, AddrModeRel {
- bits<4> name;
- bits<5> dst;
- bits<5> src1;
- bits<14> offset;
- bits<11> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), offset{13-3},
- !if (!eq(ImmOpStr, "s11_2Ext"), offset{12-2},
- !if (!eq(ImmOpStr, "s11_1Ext"), offset{11-1},
- /* s11_0Ext */ offset{10-0})));
- let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14,
- !if (!eq(ImmOpStr, "s11_2Ext"), 13,
- !if (!eq(ImmOpStr, "s11_1Ext"), 12,
- /* s11_0Ext */ 11)));
- let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
-
- let IClass = 0b1001;
-
- let Inst{27} = 0b0;
- let Inst{26-25} = offsetBits{10-9};
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13-5} = offsetBits{8-0};
- let Inst{4-0} = dst;
- }
-
-let opExtendable = 3, isExtentSigned = 0, isPredicated = 1 in
-class T_pload_io <string mnemonic, RegisterClass RC, bits<4>MajOp,
- Operand ImmOp, bit isNot, bit isPredNew>
- : LDInst<(outs RC:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
- "if ("#!if(isNot, "!$src1", "$src1")
- #!if(isPredNew, ".new", "")
- #") $dst = "#mnemonic#"($src2 + #$offset)",
- [],"", V2LDST_tc_ld_SLOT01> , AddrModeRel {
- bits<5> dst;
- bits<2> src1;
- bits<5> src2;
- bits<9> offset;
- bits<6> offsetBits;
- string ImmOpStr = !cast<string>(ImmOp);
-
- let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), offset{8-3},
- !if (!eq(ImmOpStr, "u6_2Ext"), offset{7-2},
- !if (!eq(ImmOpStr, "u6_1Ext"), offset{6-1},
- /* u6_0Ext */ offset{5-0})));
- let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9,
- !if (!eq(ImmOpStr, "u6_2Ext"), 8,
- !if (!eq(ImmOpStr, "u6_1Ext"), 7,
- /* u6_0Ext */ 6)));
- let hasNewValue = !if (!eq(ImmOpStr, "u6_3Ext"), 0, 1);
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = isNot;
-
- let IClass = 0b0100;
-
- let Inst{27} = 0b0;
- let Inst{27} = 0b0;
- let Inst{26} = isNot;
- let Inst{25} = isPredNew;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13} = 0b0;
- let Inst{12-11} = src1;
- let Inst{10-5} = offsetBits;
- let Inst{4-0} = dst;
- }
-
-let isExtendable = 1, hasSideEffects = 0, addrMode = BaseImmOffset in
-multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, Operand predImmOp, bits<4>MajOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
- let isPredicable = 1 in
- def L2_#NAME#_io : T_load_io <mnemonic, RC, MajOp, ImmOp>;
-
- // Predicated
- def L2_p#NAME#t_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 0>;
- def L2_p#NAME#f_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 0>;
-
- // Predicated new
- def L2_p#NAME#tnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 1>;
- def L2_p#NAME#fnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 1>;
- }
-}
-
-let accessSize = ByteAccess in {
- defm loadrb: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext, 0b1000>;
- defm loadrub: LD_Idxd <"memub", "LDriub", IntRegs, s11_0Ext, u6_0Ext, 0b1001>;
-}
-
-let accessSize = HalfWordAccess, opExtentAlign = 1 in {
- defm loadrh: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext, 0b1010>;
- defm loadruh: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext, 0b1011>;
-}
-
-let accessSize = WordAccess, opExtentAlign = 2 in
-defm loadri: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext, 0b1100>;
-
-let accessSize = DoubleWordAccess, opExtentAlign = 3 in
-defm loadrd: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext, 0b1110>;
-
-let accessSize = HalfWordAccess, opExtentAlign = 1 in {
- def L2_loadbsw2_io: T_load_io<"membh", IntRegs, 0b0001, s11_1Ext>;
- def L2_loadbzw2_io: T_load_io<"memubh", IntRegs, 0b0011, s11_1Ext>;
-}
-
-let accessSize = WordAccess, opExtentAlign = 2 in {
- def L2_loadbzw4_io: T_load_io<"memubh", DoubleRegs, 0b0101, s11_2Ext>;
- def L2_loadbsw4_io: T_load_io<"membh", DoubleRegs, 0b0111, s11_2Ext>;
-}
-
-let addrMode = BaseImmOffset, isExtendable = 1, hasSideEffects = 0,
- opExtendable = 3, isExtentSigned = 1 in
-class T_loadalign_io <string str, bits<4> MajOp, Operand ImmOp>
- : LDInst<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset),
- "$dst = "#str#"($src2 + #$offset)", [],
- "$src1 = $dst">, AddrModeRel {
- bits<4> name;
- bits<5> dst;
- bits<5> src2;
- bits<12> offset;
- bits<11> offsetBits;
-
- let offsetBits = !if (!eq(!cast<string>(ImmOp), "s11_1Ext"), offset{11-1},
- /* s11_0Ext */ offset{10-0});
- let IClass = 0b1001;
-
- let Inst{27} = 0b0;
- let Inst{26-25} = offsetBits{10-9};
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13-5} = offsetBits{8-0};
- let Inst{4-0} = dst;
- }
-
-let accessSize = HalfWordAccess, opExtentBits = 12, opExtentAlign = 1 in
-def L2_loadalignh_io: T_loadalign_io <"memh_fifo", 0b0010, s11_1Ext>;
-
-let accessSize = ByteAccess, opExtentBits = 11 in
-def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>;
-
-//===----------------------------------------------------------------------===//
-// Post increment load
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Template class for non-predicated post increment loads with immediate offset.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, addrMode = PostInc in
-class T_load_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<4> MajOp >
- : LDInstPI <(outs RC:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, ImmOp:$offset),
- "$dst = "#mnemonic#"($src1++#$offset)" ,
- [],
- "$src1 = $dst2" > ,
- PredNewRel {
- bits<5> dst;
- bits<5> src1;
- bits<7> offset;
- bits<4> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
- !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0})));
- let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
-
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b101;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13-12} = 0b00;
- let Inst{8-5} = offsetBits;
- let Inst{4-0} = dst;
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated post increment loads with immediate offset.
-//===----------------------------------------------------------------------===//
-let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
-class T_pload_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<4> MajOp, bit isPredNot, bit isPredNew >
- : LDInst <(outs RC:$dst, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
- !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#"$dst = "#mnemonic#"($src2++#$offset)",
- [] ,
- "$src2 = $dst2" > ,
- PredNewRel {
- bits<5> dst;
- bits<2> src1;
- bits<5> src2;
- bits<7> offset;
- bits<4> offsetBits;
-
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = isPredNot;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
- !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0})));
- let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
-
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b101;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13} = 0b1;
- let Inst{12} = isPredNew;
- let Inst{11} = isPredNot;
- let Inst{10-9} = src1;
- let Inst{8-5} = offsetBits;
- let Inst{4-0} = dst;
- }
-
-//===----------------------------------------------------------------------===//
-// Multiclass for post increment loads with immediate offset.
-//===----------------------------------------------------------------------===//
-
-multiclass LD_PostInc <string mnemonic, string BaseOp, RegisterClass RC,
- Operand ImmOp, bits<4> MajOp> {
- let BaseOpcode = "POST_"#BaseOp in {
- let isPredicable = 1 in
- def L2_#NAME#_pi : T_load_pi < mnemonic, RC, ImmOp, MajOp>;
-
- // Predicated
- def L2_p#NAME#t_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 0>;
- def L2_p#NAME#f_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 0>;
-
- // Predicated new
- def L2_p#NAME#tnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 1>;
- def L2_p#NAME#fnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 1>;
- }
-}
-
-// post increment byte loads with immediate offset
-let accessSize = ByteAccess in {
- defm loadrb : LD_PostInc <"memb", "LDrib", IntRegs, s4_0Imm, 0b1000>;
- defm loadrub : LD_PostInc <"memub", "LDriub", IntRegs, s4_0Imm, 0b1001>;
-}
-
-// post increment halfword loads with immediate offset
-let accessSize = HalfWordAccess, opExtentAlign = 1 in {
- defm loadrh : LD_PostInc <"memh", "LDrih", IntRegs, s4_1Imm, 0b1010>;
- defm loadruh : LD_PostInc <"memuh", "LDriuh", IntRegs, s4_1Imm, 0b1011>;
-}
-
-// post increment word loads with immediate offset
-let accessSize = WordAccess, opExtentAlign = 2 in
-defm loadri : LD_PostInc <"memw", "LDriw", IntRegs, s4_2Imm, 0b1100>;
-
-// post increment doubleword loads with immediate offset
-let accessSize = DoubleWordAccess, opExtentAlign = 3 in
-defm loadrd : LD_PostInc <"memd", "LDrid", DoubleRegs, s4_3Imm, 0b1110>;
-
-// Rd=memb[u]h(Rx++#s4:1)
-// Rdd=memb[u]h(Rx++#s4:2)
-let accessSize = HalfWordAccess, opExtentAlign = 1 in {
- def L2_loadbsw2_pi : T_load_pi <"membh", IntRegs, s4_1Imm, 0b0001>;
- def L2_loadbzw2_pi : T_load_pi <"memubh", IntRegs, s4_1Imm, 0b0011>;
-}
-let accessSize = WordAccess, opExtentAlign = 2, hasNewValue = 0 in {
- def L2_loadbsw4_pi : T_load_pi <"membh", DoubleRegs, s4_2Imm, 0b0111>;
- def L2_loadbzw4_pi : T_load_pi <"memubh", DoubleRegs, s4_2Imm, 0b0101>;
-}
-
-//===----------------------------------------------------------------------===//
-// Template class for post increment fifo loads with immediate offset.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, addrMode = PostInc in
-class T_loadalign_pi <string mnemonic, Operand ImmOp, bits<4> MajOp >
- : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$dst2),
- (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset),
- "$dst = "#mnemonic#"($src2++#$offset)" ,
- [], "$src2 = $dst2, $src1 = $dst" > ,
- PredNewRel {
- bits<5> dst;
- bits<5> src2;
- bits<5> offset;
- bits<4> offsetBits;
-
- let offsetBits = !if (!eq(!cast<string>(ImmOp), "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0});
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b101;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13-12} = 0b00;
- let Inst{8-5} = offsetBits;
- let Inst{4-0} = dst;
- }
-
-// Ryy=memh_fifo(Rx++#s4:1)
-// Ryy=memb_fifo(Rx++#s4:0)
-let accessSize = ByteAccess in
-def L2_loadalignb_pi : T_loadalign_pi <"memb_fifo", s4_0Imm, 0b0100>;
-
-let accessSize = HalfWordAccess, opExtentAlign = 1 in
-def L2_loadalignh_pi : T_loadalign_pi <"memh_fifo", s4_1Imm, 0b0010>;
-
-//===----------------------------------------------------------------------===//
-// Template class for post increment loads with register offset.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, addrMode = PostInc in
-class T_load_pr <string mnemonic, RegisterClass RC, bits<4> MajOp,
- MemAccessSize AccessSz>
- : LDInstPI <(outs RC:$dst, IntRegs:$_dst_),
- (ins IntRegs:$src1, ModRegs:$src2),
- "$dst = "#mnemonic#"($src1++$src2)" ,
- [], "$src1 = $_dst_" > {
- bits<5> dst;
- bits<5> src1;
- bits<1> src2;
-
- let accessSize = AccessSz;
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b110;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = src2;
- let Inst{12} = 0b0;
- let Inst{7} = 0b0;
- let Inst{4-0} = dst;
- }
-
-let hasNewValue = 1 in {
- def L2_loadrb_pr : T_load_pr <"memb", IntRegs, 0b1000, ByteAccess>;
- def L2_loadrub_pr : T_load_pr <"memub", IntRegs, 0b1001, ByteAccess>;
- def L2_loadrh_pr : T_load_pr <"memh", IntRegs, 0b1010, HalfWordAccess>;
- def L2_loadruh_pr : T_load_pr <"memuh", IntRegs, 0b1011, HalfWordAccess>;
- def L2_loadri_pr : T_load_pr <"memw", IntRegs, 0b1100, WordAccess>;
-
- def L2_loadbzw2_pr : T_load_pr <"memubh", IntRegs, 0b0011, HalfWordAccess>;
-}
-
-def L2_loadrd_pr : T_load_pr <"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
-def L2_loadbzw4_pr : T_load_pr <"memubh", DoubleRegs, 0b0101, WordAccess>;
-
-// Load predicate.
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
- isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
-def LDriw_pred : LDInst<(outs PredRegs:$dst),
- (ins IntRegs:$addr, s11_2Ext:$off),
- ".error \"should not emit\"", []>;
-// Load modifier.
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
- isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
-def LDriw_mod : LDInst<(outs ModRegs:$dst),
- (ins IntRegs:$addr, s11_2Ext:$off),
- ".error \"should not emit\"", []>;
-
-let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0 in
- def L2_deallocframe : LDInst<(outs), (ins),
- "deallocframe",
- []> {
- let IClass = 0b1001;
-
- let Inst{27-16} = 0b000000011110;
- let Inst{13} = 0b0;
- let Inst{4-0} = 0b11110;
-}
-
-// Load / Post increment circular addressing mode.
-let Uses = [CS], hasSideEffects = 0, addrMode = PostInc in
-class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp>
- : LDInst <(outs RC:$dst, IntRegs:$_dst_),
- (ins IntRegs:$Rz, ModRegs:$Mu),
- "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [],
- "$Rz = $_dst_" > {
- bits<5> dst;
- bits<5> Rz;
- bit Mu;
-
- let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b100;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12} = 0b0;
- let Inst{9} = 0b1;
- let Inst{7} = 0b0;
- let Inst{4-0} = dst;
- }
-
-let accessSize = ByteAccess in {
- def L2_loadrb_pcr : T_load_pcr <"memb", IntRegs, 0b1000>;
- def L2_loadrub_pcr : T_load_pcr <"memub", IntRegs, 0b1001>;
-}
-
-let accessSize = HalfWordAccess in {
- def L2_loadrh_pcr : T_load_pcr <"memh", IntRegs, 0b1010>;
- def L2_loadruh_pcr : T_load_pcr <"memuh", IntRegs, 0b1011>;
- def L2_loadbsw2_pcr : T_load_pcr <"membh", IntRegs, 0b0001>;
- def L2_loadbzw2_pcr : T_load_pcr <"memubh", IntRegs, 0b0011>;
-}
-
-let accessSize = WordAccess in {
- def L2_loadri_pcr : T_load_pcr <"memw", IntRegs, 0b1100>;
- let hasNewValue = 0 in {
- def L2_loadbzw4_pcr : T_load_pcr <"memubh", DoubleRegs, 0b0101>;
- def L2_loadbsw4_pcr : T_load_pcr <"membh", DoubleRegs, 0b0111>;
- }
-}
-
-let accessSize = DoubleWordAccess in
-def L2_loadrd_pcr : T_load_pcr <"memd", DoubleRegs, 0b1110>;
-
-// Load / Post increment circular addressing mode.
-let Uses = [CS], hasSideEffects = 0, addrMode = PostInc in
-class T_loadalign_pcr<string mnemonic, bits<4> MajOp, MemAccessSize AccessSz >
- : LDInst <(outs DoubleRegs:$dst, IntRegs:$_dst_),
- (ins DoubleRegs:$_src_, IntRegs:$Rz, ModRegs:$Mu),
- "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [],
- "$Rz = $_dst_, $dst = $_src_" > {
- bits<5> dst;
- bits<5> Rz;
- bit Mu;
-
- let accessSize = AccessSz;
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b100;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12} = 0b0;
- let Inst{9} = 0b1;
- let Inst{7} = 0b0;
- let Inst{4-0} = dst;
- }
-
-def L2_loadalignb_pcr : T_loadalign_pcr <"memb_fifo", 0b0100, ByteAccess>;
-def L2_loadalignh_pcr : T_loadalign_pcr <"memh_fifo", 0b0010, HalfWordAccess>;
-
-//===----------------------------------------------------------------------===//
-// Circular loads with immediate offset.
-//===----------------------------------------------------------------------===//
-let Uses = [CS], mayLoad = 1, hasSideEffects = 0, addrMode = PostInc in
-class T_load_pci <string mnemonic, RegisterClass RC,
- Operand ImmOp, bits<4> MajOp>
- : LDInstPI<(outs RC:$dst, IntRegs:$_dst_),
- (ins IntRegs:$Rz, ImmOp:$offset, ModRegs:$Mu),
- "$dst = "#mnemonic#"($Rz ++ #$offset:circ($Mu))", [],
- "$Rz = $_dst_"> {
- bits<5> dst;
- bits<5> Rz;
- bits<1> Mu;
- bits<7> offset;
- bits<4> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
- let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
- !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0})));
- let IClass = 0b1001;
- let Inst{27-25} = 0b100;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12} = 0b0;
- let Inst{9} = 0b0;
- let Inst{8-5} = offsetBits;
- let Inst{4-0} = dst;
- }
-
-// Byte variants of circ load
-let accessSize = ByteAccess in {
- def L2_loadrb_pci : T_load_pci <"memb", IntRegs, s4_0Imm, 0b1000>;
- def L2_loadrub_pci : T_load_pci <"memub", IntRegs, s4_0Imm, 0b1001>;
-}
-
-// Half word variants of circ load
-let accessSize = HalfWordAccess in {
- def L2_loadrh_pci : T_load_pci <"memh", IntRegs, s4_1Imm, 0b1010>;
- def L2_loadruh_pci : T_load_pci <"memuh", IntRegs, s4_1Imm, 0b1011>;
- def L2_loadbzw2_pci : T_load_pci <"memubh", IntRegs, s4_1Imm, 0b0011>;
- def L2_loadbsw2_pci : T_load_pci <"membh", IntRegs, s4_1Imm, 0b0001>;
-}
-
-// Word variants of circ load
-let accessSize = WordAccess in
-def L2_loadri_pci : T_load_pci <"memw", IntRegs, s4_2Imm, 0b1100>;
-
-let accessSize = WordAccess, hasNewValue = 0 in {
- def L2_loadbzw4_pci : T_load_pci <"memubh", DoubleRegs, s4_2Imm, 0b0101>;
- def L2_loadbsw4_pci : T_load_pci <"membh", DoubleRegs, s4_2Imm, 0b0111>;
-}
-
-let accessSize = DoubleWordAccess, hasNewValue = 0 in
-def L2_loadrd_pci : T_load_pci <"memd", DoubleRegs, s4_3Imm, 0b1110>;
-
-
-// TODO: memb_fifo and memh_fifo must take destination register as input.
-// One-off circ loads - not enough in common to break into a class.
-let accessSize = ByteAccess in
-def L2_loadalignb_pci : T_load_pci <"memb_fifo", DoubleRegs, s4_0Imm, 0b0100>;
-
-let accessSize = HalfWordAccess, opExtentAlign = 1 in
-def L2_loadalignh_pci : T_load_pci <"memh_fifo", DoubleRegs, s4_1Imm, 0b0010>;
-
-// L[24]_load[wd]_locked: Load word/double with lock.
-let isSoloAX = 1 in
-class T_load_locked <string mnemonic, RegisterClass RC>
- : LD0Inst <(outs RC:$dst),
- (ins IntRegs:$src),
- "$dst = "#mnemonic#"($src)"> {
- bits<5> dst;
- bits<5> src;
- let IClass = 0b1001;
- let Inst{27-21} = 0b0010000;
- let Inst{20-16} = src;
- let Inst{13-12} = !if (!eq(mnemonic, "memd_locked"), 0b01, 0b00);
- let Inst{5} = 0;
- let Inst{4-0} = dst;
-}
-let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0 in
- def L2_loadw_locked : T_load_locked <"memw_locked", IntRegs>;
-let accessSize = DoubleWordAccess in
- def L4_loadd_locked : T_load_locked <"memd_locked", DoubleRegs>;
-
-// S[24]_store[wd]_locked: Store word/double conditionally.
-let isSoloAX = 1, isPredicateLate = 1 in
-class T_store_locked <string mnemonic, RegisterClass RC>
- : ST0Inst <(outs PredRegs:$Pd), (ins IntRegs:$Rs, RC:$Rt),
- mnemonic#"($Rs, $Pd) = $Rt"> {
- bits<2> Pd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1010;
- let Inst{27-23} = 0b00001;
- let Inst{22} = !if (!eq(mnemonic, "memw_locked"), 0b0, 0b1);
- let Inst{21} = 0b1;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{1-0} = Pd;
-}
-
-let accessSize = WordAccess in
-def S2_storew_locked : T_store_locked <"memw_locked", IntRegs>;
-
-let accessSize = DoubleWordAccess in
-def S4_stored_locked : T_store_locked <"memd_locked", DoubleRegs>;
-
-//===----------------------------------------------------------------------===//
-// Bit-reversed loads with auto-increment register
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, addrMode = PostInc in
-class T_load_pbr<string mnemonic, RegisterClass RC,
- MemAccessSize addrSize, bits<4> majOp>
- : LDInst
- <(outs RC:$dst, IntRegs:$_dst_),
- (ins IntRegs:$Rz, ModRegs:$Mu),
- "$dst = "#mnemonic#"($Rz ++ $Mu:brev)" ,
- [] , "$Rz = $_dst_" > {
-
- let accessSize = addrSize;
-
- bits<5> dst;
- bits<5> Rz;
- bits<1> Mu;
-
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b111;
- let Inst{24-21} = majOp;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12} = 0b0;
- let Inst{7} = 0b0;
- let Inst{4-0} = dst;
- }
-
-let hasNewValue =1, opNewValue = 0 in {
- def L2_loadrb_pbr : T_load_pbr <"memb", IntRegs, ByteAccess, 0b1000>;
- def L2_loadrub_pbr : T_load_pbr <"memub", IntRegs, ByteAccess, 0b1001>;
- def L2_loadrh_pbr : T_load_pbr <"memh", IntRegs, HalfWordAccess, 0b1010>;
- def L2_loadruh_pbr : T_load_pbr <"memuh", IntRegs, HalfWordAccess, 0b1011>;
- def L2_loadbsw2_pbr : T_load_pbr <"membh", IntRegs, HalfWordAccess, 0b0001>;
- def L2_loadbzw2_pbr : T_load_pbr <"memubh", IntRegs, HalfWordAccess, 0b0011>;
- def L2_loadri_pbr : T_load_pbr <"memw", IntRegs, WordAccess, 0b1100>;
-}
-
-def L2_loadbzw4_pbr : T_load_pbr <"memubh", DoubleRegs, WordAccess, 0b0101>;
-def L2_loadbsw4_pbr : T_load_pbr <"membh", DoubleRegs, WordAccess, 0b0111>;
-def L2_loadrd_pbr : T_load_pbr <"memd", DoubleRegs, DoubleWordAccess, 0b1110>;
-
-def L2_loadalignb_pbr :T_load_pbr <"memb_fifo", DoubleRegs, ByteAccess, 0b0100>;
-def L2_loadalignh_pbr :T_load_pbr <"memh_fifo", DoubleRegs,
- HalfWordAccess, 0b0010>;
-
-//===----------------------------------------------------------------------===//
-// LD -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MTYPE/ALU +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// MTYPE/ALU -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MTYPE/COMPLEX +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// MTYPE/COMPLEX -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MTYPE/MPYH +
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Template Class
-// MPYS / Multipy signed/unsigned halfwords
-//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
-//===----------------------------------------------------------------------===//
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_M2_mpy < bits<2> LHbits, bit isSat, bit isRnd,
- bit hasShift, bit isUnsigned>
- : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l")
- #", $Rt."#!if(LHbits{0},"h)","l)")
- #!if(hasShift,":<<1","")
- #!if(isRnd,":rnd","")
- #!if(isSat,":sat",""),
- [], "", M_tc_3x_SLOT23 > {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1100;
- let Inst{23} = hasShift;
- let Inst{22} = isUnsigned;
- let Inst{21} = isRnd;
- let Inst{7} = isSat;
- let Inst{6-5} = LHbits;
- let Inst{4-0} = Rd;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- }
-
-//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]
-def M2_mpy_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 0>;
-def M2_mpy_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 0>;
-def M2_mpy_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 0>;
-def M2_mpy_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 0>;
-def M2_mpy_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 0>;
-def M2_mpy_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 0>;
-def M2_mpy_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 0>;
-def M2_mpy_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 0>;
-
-//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
-def M2_mpyu_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 1>;
-def M2_mpyu_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 1>;
-def M2_mpyu_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 1>;
-def M2_mpyu_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 1>;
-def M2_mpyu_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 1>;
-def M2_mpyu_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 1>;
-def M2_mpyu_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 1>;
-def M2_mpyu_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 1>;
-
-//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]:rnd
-def M2_mpy_rnd_ll_s1: T_M2_mpy <0b00, 0, 1, 1, 0>;
-def M2_mpy_rnd_ll_s0: T_M2_mpy <0b00, 0, 1, 0, 0>;
-def M2_mpy_rnd_lh_s1: T_M2_mpy <0b01, 0, 1, 1, 0>;
-def M2_mpy_rnd_lh_s0: T_M2_mpy <0b01, 0, 1, 0, 0>;
-def M2_mpy_rnd_hl_s1: T_M2_mpy <0b10, 0, 1, 1, 0>;
-def M2_mpy_rnd_hl_s0: T_M2_mpy <0b10, 0, 1, 0, 0>;
-def M2_mpy_rnd_hh_s1: T_M2_mpy <0b11, 0, 1, 1, 0>;
-def M2_mpy_rnd_hh_s0: T_M2_mpy <0b11, 0, 1, 0, 0>;
-
-//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:sat]
-//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
-let Defs = [USR_OVF] in {
- def M2_mpy_sat_ll_s1: T_M2_mpy <0b00, 1, 0, 1, 0>;
- def M2_mpy_sat_ll_s0: T_M2_mpy <0b00, 1, 0, 0, 0>;
- def M2_mpy_sat_lh_s1: T_M2_mpy <0b01, 1, 0, 1, 0>;
- def M2_mpy_sat_lh_s0: T_M2_mpy <0b01, 1, 0, 0, 0>;
- def M2_mpy_sat_hl_s1: T_M2_mpy <0b10, 1, 0, 1, 0>;
- def M2_mpy_sat_hl_s0: T_M2_mpy <0b10, 1, 0, 0, 0>;
- def M2_mpy_sat_hh_s1: T_M2_mpy <0b11, 1, 0, 1, 0>;
- def M2_mpy_sat_hh_s0: T_M2_mpy <0b11, 1, 0, 0, 0>;
-
- def M2_mpy_sat_rnd_ll_s1: T_M2_mpy <0b00, 1, 1, 1, 0>;
- def M2_mpy_sat_rnd_ll_s0: T_M2_mpy <0b00, 1, 1, 0, 0>;
- def M2_mpy_sat_rnd_lh_s1: T_M2_mpy <0b01, 1, 1, 1, 0>;
- def M2_mpy_sat_rnd_lh_s0: T_M2_mpy <0b01, 1, 1, 0, 0>;
- def M2_mpy_sat_rnd_hl_s1: T_M2_mpy <0b10, 1, 1, 1, 0>;
- def M2_mpy_sat_rnd_hl_s0: T_M2_mpy <0b10, 1, 1, 0, 0>;
- def M2_mpy_sat_rnd_hh_s1: T_M2_mpy <0b11, 1, 1, 1, 0>;
- def M2_mpy_sat_rnd_hh_s0: T_M2_mpy <0b11, 1, 1, 0, 0>;
-}
-
-//===----------------------------------------------------------------------===//
-// Template Class
-// MPYS / Multipy signed/unsigned halfwords and add/subtract the
-// result from the accumulator.
-//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
-//===----------------------------------------------------------------------===//
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_M2_mpy_acc < bits<2> LHbits, bit isSat, bit isNac,
- bit hasShift, bit isUnsigned >
- : MInst_acc<(outs IntRegs:$Rx), (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
- "$Rx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy")
- #"($Rs."#!if(LHbits{1},"h","l")
- #", $Rt."#!if(LHbits{0},"h)","l)")
- #!if(hasShift,":<<1","")
- #!if(isSat,":sat",""),
- [], "$dst2 = $Rx", M_tc_3x_SLOT23 > {
- bits<5> Rx;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
- let Inst{27-24} = 0b1110;
- let Inst{23} = hasShift;
- let Inst{22} = isUnsigned;
- let Inst{21} = isNac;
- let Inst{7} = isSat;
- let Inst{6-5} = LHbits;
- let Inst{4-0} = Rx;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- }
-
-//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]
-def M2_mpy_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 0>;
-def M2_mpy_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 0>;
-def M2_mpy_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 0>;
-def M2_mpy_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 0>;
-def M2_mpy_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 0>;
-def M2_mpy_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 0>;
-def M2_mpy_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 0>;
-def M2_mpy_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 0>;
-
-//Rx += mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
-def M2_mpyu_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 1>;
-def M2_mpyu_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 1>;
-def M2_mpyu_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 1>;
-def M2_mpyu_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 1>;
-def M2_mpyu_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 1>;
-def M2_mpyu_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 1>;
-def M2_mpyu_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 1>;
-def M2_mpyu_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 1>;
-
-//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]
-def M2_mpy_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 0>;
-def M2_mpy_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 0>;
-def M2_mpy_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 0>;
-def M2_mpy_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 0>;
-def M2_mpy_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 0>;
-def M2_mpy_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 0>;
-def M2_mpy_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 0>;
-def M2_mpy_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 0>;
-
-//Rx -= mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
-def M2_mpyu_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 1>;
-def M2_mpyu_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 1>;
-def M2_mpyu_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 1>;
-def M2_mpyu_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 1>;
-def M2_mpyu_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 1>;
-def M2_mpyu_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 1>;
-def M2_mpyu_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 1>;
-def M2_mpyu_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 1>;
-
-//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
-def M2_mpy_acc_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 0, 1, 0>;
-def M2_mpy_acc_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 0, 0, 0>;
-def M2_mpy_acc_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 0, 1, 0>;
-def M2_mpy_acc_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 0, 0, 0>;
-def M2_mpy_acc_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 0, 1, 0>;
-def M2_mpy_acc_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 0, 0, 0>;
-def M2_mpy_acc_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 0, 1, 0>;
-def M2_mpy_acc_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 0, 0, 0>;
-
-//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
-def M2_mpy_nac_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 1, 1, 0>;
-def M2_mpy_nac_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 1, 0, 0>;
-def M2_mpy_nac_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 1, 1, 0>;
-def M2_mpy_nac_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 1, 0, 0>;
-def M2_mpy_nac_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 1, 1, 0>;
-def M2_mpy_nac_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 1, 0, 0>;
-def M2_mpy_nac_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 1, 1, 0>;
-def M2_mpy_nac_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 1, 0, 0>;
-
-//===----------------------------------------------------------------------===//
-// Template Class
-// MPYS / Multipy signed/unsigned halfwords and add/subtract the
-// result from the 64-bit destination register.
-//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
-//===----------------------------------------------------------------------===//
-
-class T_M2_mpyd_acc < bits<2> LHbits, bit isNac, bit hasShift, bit isUnsigned>
- : MInst_acc<(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
- "$Rxx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy")
- #"($Rs."#!if(LHbits{1},"h","l")
- #", $Rt."#!if(LHbits{0},"h)","l)")
- #!if(hasShift,":<<1",""),
- [], "$dst2 = $Rxx", M_tc_3x_SLOT23 > {
- bits<5> Rxx;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b0110;
- let Inst{23} = hasShift;
- let Inst{22} = isUnsigned;
- let Inst{21} = isNac;
- let Inst{7} = 0;
- let Inst{6-5} = LHbits;
- let Inst{4-0} = Rxx;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- }
-
-def M2_mpyd_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 0>;
-def M2_mpyd_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 0>;
-def M2_mpyd_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 0>;
-def M2_mpyd_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 0>;
-
-def M2_mpyd_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 0>;
-def M2_mpyd_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 0>;
-def M2_mpyd_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 0>;
-def M2_mpyd_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 0>;
-
-def M2_mpyd_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 0>;
-def M2_mpyd_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 0>;
-def M2_mpyd_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 0>;
-def M2_mpyd_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 0>;
-
-def M2_mpyd_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 0>;
-def M2_mpyd_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 0>;
-def M2_mpyd_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 0>;
-def M2_mpyd_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 0>;
-
-def M2_mpyud_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 1>;
-def M2_mpyud_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 1>;
-def M2_mpyud_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 1>;
-def M2_mpyud_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 1>;
-
-def M2_mpyud_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 1>;
-def M2_mpyud_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 1>;
-def M2_mpyud_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 1>;
-def M2_mpyud_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 1>;
-
-def M2_mpyud_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 1>;
-def M2_mpyud_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 1>;
-def M2_mpyud_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 1>;
-def M2_mpyud_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 1>;
-
-def M2_mpyud_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 1>;
-def M2_mpyud_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 1>;
-def M2_mpyud_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 1>;
-def M2_mpyud_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 1>;
-
-//===----------------------------------------------------------------------===//
-// Template Class -- Vector Multipy
-// Used for complex multiply real or imaginary, dual multiply and even halfwords
-//===----------------------------------------------------------------------===//
-class T_M2_vmpy < string opc, bits<3> MajOp, bits<3> MinOp, bit hasShift,
- bit isRnd, bit isSat >
- : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rdd = "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
- #!if(isRnd,":rnd","")
- #!if(isSat,":sat",""),
- [] > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1000;
- let Inst{23-21} = MajOp;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rdd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
-let Defs = [USR_OVF] in {
-def M2_vcmpy_s1_sat_i: T_M2_vmpy <"vcmpyi", 0b110, 0b110, 1, 0, 1>;
-def M2_vcmpy_s0_sat_i: T_M2_vmpy <"vcmpyi", 0b010, 0b110, 0, 0, 1>;
-
-// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
-def M2_vcmpy_s1_sat_r: T_M2_vmpy <"vcmpyr", 0b101, 0b110, 1, 0, 1>;
-def M2_vcmpy_s0_sat_r: T_M2_vmpy <"vcmpyr", 0b001, 0b110, 0, 0, 1>;
-
-// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
-def M2_vdmpys_s1: T_M2_vmpy <"vdmpy", 0b100, 0b100, 1, 0, 1>;
-def M2_vdmpys_s0: T_M2_vmpy <"vdmpy", 0b000, 0b100, 0, 0, 1>;
-
-// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
-def M2_vmpy2es_s1: T_M2_vmpy <"vmpyeh", 0b100, 0b110, 1, 0, 1>;
-def M2_vmpy2es_s0: T_M2_vmpy <"vmpyeh", 0b000, 0b110, 0, 0, 1>;
-
-//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
-def M2_mmpyh_s0: T_M2_vmpy <"vmpywoh", 0b000, 0b111, 0, 0, 1>;
-def M2_mmpyh_s1: T_M2_vmpy <"vmpywoh", 0b100, 0b111, 1, 0, 1>;
-def M2_mmpyh_rs0: T_M2_vmpy <"vmpywoh", 0b001, 0b111, 0, 1, 1>;
-def M2_mmpyh_rs1: T_M2_vmpy <"vmpywoh", 0b101, 0b111, 1, 1, 1>;
-
-//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
-def M2_mmpyl_s0: T_M2_vmpy <"vmpyweh", 0b000, 0b101, 0, 0, 1>;
-def M2_mmpyl_s1: T_M2_vmpy <"vmpyweh", 0b100, 0b101, 1, 0, 1>;
-def M2_mmpyl_rs0: T_M2_vmpy <"vmpyweh", 0b001, 0b101, 0, 1, 1>;
-def M2_mmpyl_rs1: T_M2_vmpy <"vmpyweh", 0b101, 0b101, 1, 1, 1>;
-
-//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
-def M2_mmpyuh_s0: T_M2_vmpy <"vmpywouh", 0b010, 0b111, 0, 0, 1>;
-def M2_mmpyuh_s1: T_M2_vmpy <"vmpywouh", 0b110, 0b111, 1, 0, 1>;
-def M2_mmpyuh_rs0: T_M2_vmpy <"vmpywouh", 0b011, 0b111, 0, 1, 1>;
-def M2_mmpyuh_rs1: T_M2_vmpy <"vmpywouh", 0b111, 0b111, 1, 1, 1>;
-
-//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
-def M2_mmpyul_s0: T_M2_vmpy <"vmpyweuh", 0b010, 0b101, 0, 0, 1>;
-def M2_mmpyul_s1: T_M2_vmpy <"vmpyweuh", 0b110, 0b101, 1, 0, 1>;
-def M2_mmpyul_rs0: T_M2_vmpy <"vmpyweuh", 0b011, 0b101, 0, 1, 1>;
-def M2_mmpyul_rs1: T_M2_vmpy <"vmpyweuh", 0b111, 0b101, 1, 1, 1>;
-}
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_MType_mpy <string mnemonic, bits<4> RegTyBits, RegisterClass RC,
- bits<3> MajOp, bits<3> MinOp, bit isSat = 0, bit isRnd = 0,
- string op2Suffix = "", bit isRaw = 0, bit isHi = 0 >
- : MInst <(outs IntRegs:$dst), (ins RC:$src1, RC:$src2),
- "$dst = "#mnemonic
- #"($src1, $src2"#op2Suffix#")"
- #!if(MajOp{2}, ":<<1", "")
- #!if(isRnd, ":rnd", "")
- #!if(isSat, ":sat", "")
- #!if(isRaw, !if(isHi, ":raw:hi", ":raw:lo"), ""), [] > {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = RegTyBits;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = 0b0;
- let Inst{12-8} = src2;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = dst;
- }
-
-class T_MType_vrcmpy <string mnemonic, bits<3> MajOp, bits<3> MinOp, bit isHi>
- : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, 1, 1, "", 1, isHi>;
-
-class T_MType_dd <string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit isSat = 0, bit isRnd = 0 >
- : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, isSat, isRnd>;
-
-class T_MType_rr1 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit isSat = 0, bit isRnd = 0 >
- : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd>;
-
-class T_MType_rr2 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit isSat = 0, bit isRnd = 0, string op2str = "" >
- : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd, op2str>;
-
-def M2_vradduh : T_MType_dd <"vradduh", 0b000, 0b001, 0, 0>;
-def M2_vdmpyrs_s0 : T_MType_dd <"vdmpy", 0b000, 0b000, 1, 1>;
-def M2_vdmpyrs_s1 : T_MType_dd <"vdmpy", 0b100, 0b000, 1, 1>;
-
-let CextOpcode = "mpyi", InputType = "reg" in
-def M2_mpyi : T_MType_rr1 <"mpyi", 0b000, 0b000>, ImmRegRel;
-
-def M2_mpy_up : T_MType_rr1 <"mpy", 0b000, 0b001>;
-def M2_mpyu_up : T_MType_rr1 <"mpyu", 0b010, 0b001>;
-
-def M2_dpmpyss_rnd_s0 : T_MType_rr1 <"mpy", 0b001, 0b001, 0, 1>;
-
-def M2_vmpy2s_s0pack : T_MType_rr1 <"vmpyh", 0b001, 0b111, 1, 1>;
-def M2_vmpy2s_s1pack : T_MType_rr1 <"vmpyh", 0b101, 0b111, 1, 1>;
-
-def M2_hmmpyh_rs1 : T_MType_rr2 <"mpy", 0b101, 0b100, 1, 1, ".h">;
-def M2_hmmpyl_rs1 : T_MType_rr2 <"mpy", 0b111, 0b100, 1, 1, ".l">;
-
-def M2_cmpyrs_s0 : T_MType_rr2 <"cmpy", 0b001, 0b110, 1, 1>;
-def M2_cmpyrs_s1 : T_MType_rr2 <"cmpy", 0b101, 0b110, 1, 1>;
-def M2_cmpyrsc_s0 : T_MType_rr2 <"cmpy", 0b011, 0b110, 1, 1, "*">;
-def M2_cmpyrsc_s1 : T_MType_rr2 <"cmpy", 0b111, 0b110, 1, 1, "*">;
-
-// V4 Instructions
-def M2_vraddh : T_MType_dd <"vraddh", 0b001, 0b111, 0>;
-def M2_mpysu_up : T_MType_rr1 <"mpysu", 0b011, 0b001, 0>;
-def M2_mpy_up_s1 : T_MType_rr1 <"mpy", 0b101, 0b010, 0>;
-def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>;
-
-def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">;
-def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">;
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern>
- : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, ImmOp:$u8),
- "$Rd ="#!if(isNeg, "- ", "+ ")#"mpyi($Rs, #$u8)" ,
- pattern, "", M_tc_3x_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<8> u8;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b0000;
- let Inst{23} = isNeg;
- let Inst{13} = 0b0;
- let Inst{4-0} = Rd;
- let Inst{20-16} = Rs;
- let Inst{12-5} = u8;
- }
-
-let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in
-def M2_mpysip : T_MType_mpy_ri <0, u8_0Ext, []>;
-
-def M2_mpysin : T_MType_mpy_ri <1, u8_0Imm, []>;
-
-// Assember mapped to M2_mpyi
-let isAsmParserOnly = 1 in
-def M2_mpyui : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = mpyui($src1, $src2)">;
-
-// Rd=mpyi(Rs,#m9)
-// s9 is NOT the same as m9 - but it works.. so far.
-// Assembler maps to either Rd=+mpyi(Rs,#u8) or Rd=-mpyi(Rs,#u8)
-// depending on the value of m9. See Arch Spec.
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
- CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1,
- isAsmParserOnly = 1 in
-def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9_0Ext:$src2),
- "$dst = mpyi($src1, #$src2)", []>, ImmRegRel;
-
-let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3,
- InputType = "imm" in
-class T_MType_acc_ri <string mnemonic, bits<3> MajOp, Operand ImmOp,
- list<dag> pattern = []>
- : MInst < (outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, ImmOp:$src3),
- "$dst "#mnemonic#"($src2, #$src3)",
- pattern, "$src1 = $dst", M_tc_2_SLOT23> {
- bits<5> dst;
- bits<5> src2;
- bits<8> src3;
-
- let IClass = 0b1110;
-
- let Inst{27-26} = 0b00;
- let Inst{25-23} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13} = 0b0;
- let Inst{12-5} = src3;
- let Inst{4-0} = dst;
- }
-
-let InputType = "reg", hasNewValue = 1 in
-class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit isSwap = 0, list<dag> pattern = [], bit hasNot = 0,
- bit isSat = 0, bit isShift = 0>
- : MInst < (outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "$dst "#mnemonic#"($src2, "#!if(hasNot, "~$src3)","$src3)")
- #!if(isShift, ":<<1", "")
- #!if(isSat, ":sat", ""),
- pattern, "$src1 = $dst", M_tc_2_SLOT23 > {
- bits<5> dst;
- bits<5> src2;
- bits<5> src3;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1111;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = !if(isSwap, src3, src2);
- let Inst{13} = 0b0;
- let Inst{12-8} = !if(isSwap, src2, src3);
- let Inst{7-5} = MinOp;
- let Inst{4-0} = dst;
- }
-
-let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in {
- def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8_0Ext, []>, ImmRegRel;
-
- def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, []>, ImmRegRel;
-}
-
-let CextOpcode = "ADD_acc" in {
- let isExtentSigned = 1 in
- def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8_0Ext, []>, ImmRegRel;
-
- def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, []>, ImmRegRel;
-}
-
-let CextOpcode = "SUB_acc" in {
- let isExtentSigned = 1 in
- def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8_0Ext>, ImmRegRel;
-
- def M2_nacci : T_MType_acc_rr <"-= add", 0b100, 0b001, 0>, ImmRegRel;
-}
-
-let Itinerary = M_tc_3x_SLOT23 in
-def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8_0Ext>;
-
-def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>;
-def M2_subacc : T_MType_acc_rr <"+= sub", 0b000, 0b011, 1>;
-
-//===----------------------------------------------------------------------===//
-// Template Class -- XType Vector Instructions
-//===----------------------------------------------------------------------===//
-class T_XTYPE_Vect < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj >
- : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rdd = "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"),
- [] > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1000;
- let Inst{23-21} = MajOp;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rdd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-class T_XTYPE_Vect_acc < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj >
- : MInst <(outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rdd += "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"),
- [], "$dst2 = $Rdd",M_tc_3x_SLOT23 > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1010;
- let Inst{23-21} = MajOp;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rdd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-class T_XTYPE_Vect_diff < bits<3> MajOp, string opc >
- : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rtt, DoubleRegs:$Rss),
- "$Rdd = "#opc#"($Rtt, $Rss)",
- [], "",M_tc_2_SLOT23 > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1000;
- let Inst{23-21} = MajOp;
- let Inst{7-5} = 0b000;
- let Inst{4-0} = Rdd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-// Vector reduce add unsigned bytes: Rdd32=vrmpybu(Rss32,Rtt32)
-def A2_vraddub: T_XTYPE_Vect <"vraddub", 0b010, 0b001, 0>;
-def A2_vraddub_acc: T_XTYPE_Vect_acc <"vraddub", 0b010, 0b001, 0>;
-
-// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
-def A2_vrsadub: T_XTYPE_Vect <"vrsadub", 0b010, 0b010, 0>;
-def A2_vrsadub_acc: T_XTYPE_Vect_acc <"vrsadub", 0b010, 0b010, 0>;
-
-// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
-def M2_vabsdiffh: T_XTYPE_Vect_diff<0b011, "vabsdiffh">;
-
-// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
-def M2_vabsdiffw: T_XTYPE_Vect_diff<0b001, "vabsdiffw">;
-
-// Vector reduce complex multiply real or imaginary:
-// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
-def M2_vrcmpyi_s0: T_XTYPE_Vect <"vrcmpyi", 0b000, 0b000, 0>;
-def M2_vrcmpyi_s0c: T_XTYPE_Vect <"vrcmpyi", 0b010, 0b000, 1>;
-def M2_vrcmaci_s0: T_XTYPE_Vect_acc <"vrcmpyi", 0b000, 0b000, 0>;
-def M2_vrcmaci_s0c: T_XTYPE_Vect_acc <"vrcmpyi", 0b010, 0b000, 1>;
-
-def M2_vrcmpyr_s0: T_XTYPE_Vect <"vrcmpyr", 0b000, 0b001, 0>;
-def M2_vrcmpyr_s0c: T_XTYPE_Vect <"vrcmpyr", 0b011, 0b001, 1>;
-def M2_vrcmacr_s0: T_XTYPE_Vect_acc <"vrcmpyr", 0b000, 0b001, 0>;
-def M2_vrcmacr_s0c: T_XTYPE_Vect_acc <"vrcmpyr", 0b011, 0b001, 1>;
-
-// Vector reduce halfwords:
-// Rdd[+]=vrmpyh(Rss,Rtt)
-def M2_vrmpy_s0: T_XTYPE_Vect <"vrmpyh", 0b000, 0b010, 0>;
-def M2_vrmac_s0: T_XTYPE_Vect_acc <"vrmpyh", 0b000, 0b010, 0>;
-
-//===----------------------------------------------------------------------===//
-// Template Class -- Vector Multipy with accumulation.
-// Used for complex multiply real or imaginary, dual multiply and even halfwords
-//===----------------------------------------------------------------------===//
-let Defs = [USR_OVF] in
-class T_M2_vmpy_acc_sat < string opc, bits<3> MajOp, bits<3> MinOp,
- bit hasShift, bit isRnd >
- : MInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
- #!if(isRnd,":rnd","")#":sat",
- [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1010;
- let Inst{23-21} = MajOp;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rxx;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-class T_M2_vmpy_acc < string opc, bits<3> MajOp, bits<3> MinOp,
- bit hasShift, bit isRnd >
- : MInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
- #!if(isRnd,":rnd",""),
- [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1010;
- let Inst{23-21} = MajOp;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rxx;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-// Vector multiply word by signed half with accumulation
-// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
-def M2_mmacls_s1: T_M2_vmpy_acc_sat <"vmpyweh", 0b100, 0b101, 1, 0>;
-def M2_mmacls_s0: T_M2_vmpy_acc_sat <"vmpyweh", 0b000, 0b101, 0, 0>;
-def M2_mmacls_rs1: T_M2_vmpy_acc_sat <"vmpyweh", 0b101, 0b101, 1, 1>;
-def M2_mmacls_rs0: T_M2_vmpy_acc_sat <"vmpyweh", 0b001, 0b101, 0, 1>;
-
-def M2_mmachs_s1: T_M2_vmpy_acc_sat <"vmpywoh", 0b100, 0b111, 1, 0>;
-def M2_mmachs_s0: T_M2_vmpy_acc_sat <"vmpywoh", 0b000, 0b111, 0, 0>;
-def M2_mmachs_rs1: T_M2_vmpy_acc_sat <"vmpywoh", 0b101, 0b111, 1, 1>;
-def M2_mmachs_rs0: T_M2_vmpy_acc_sat <"vmpywoh", 0b001, 0b111, 0, 1>;
-
-// Vector multiply word by unsigned half with accumulation
-// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
-def M2_mmaculs_s1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b110, 0b101, 1, 0>;
-def M2_mmaculs_s0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b010, 0b101, 0, 0>;
-def M2_mmaculs_rs1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b111, 0b101, 1, 1>;
-def M2_mmaculs_rs0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b011, 0b101, 0, 1>;
-
-def M2_mmacuhs_s1: T_M2_vmpy_acc_sat <"vmpywouh", 0b110, 0b111, 1, 0>;
-def M2_mmacuhs_s0: T_M2_vmpy_acc_sat <"vmpywouh", 0b010, 0b111, 0, 0>;
-def M2_mmacuhs_rs1: T_M2_vmpy_acc_sat <"vmpywouh", 0b111, 0b111, 1, 1>;
-def M2_mmacuhs_rs0: T_M2_vmpy_acc_sat <"vmpywouh", 0b011, 0b111, 0, 1>;
-
-// Vector multiply even halfwords with accumulation
-// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
-def M2_vmac2es: T_M2_vmpy_acc <"vmpyeh", 0b001, 0b010, 0, 0>;
-def M2_vmac2es_s1: T_M2_vmpy_acc_sat <"vmpyeh", 0b100, 0b110, 1, 0>;
-def M2_vmac2es_s0: T_M2_vmpy_acc_sat <"vmpyeh", 0b000, 0b110, 0, 0>;
-
-// Vector dual multiply with accumulation
-// Rxx+=vdmpy(Rss,Rtt)[:sat]
-def M2_vdmacs_s1: T_M2_vmpy_acc_sat <"vdmpy", 0b100, 0b100, 1, 0>;
-def M2_vdmacs_s0: T_M2_vmpy_acc_sat <"vdmpy", 0b000, 0b100, 0, 0>;
-
-// Vector complex multiply real or imaginary with accumulation
-// Rxx+=vcmpy[ir](Rss,Rtt):sat
-def M2_vcmac_s0_sat_r: T_M2_vmpy_acc_sat <"vcmpyr", 0b001, 0b100, 0, 0>;
-def M2_vcmac_s0_sat_i: T_M2_vmpy_acc_sat <"vcmpyi", 0b010, 0b100, 0, 0>;
-
-//===----------------------------------------------------------------------===//
-// Template Class -- Multiply signed/unsigned halfwords with and without
-// saturation and rounding
-//===----------------------------------------------------------------------===//
-class T_M2_mpyd < bits<2> LHbits, bit isRnd, bit hasShift, bit isUnsigned >
- : MInst < (outs DoubleRegs:$Rdd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rdd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l")
- #", $Rt."#!if(LHbits{0},"h)","l)")
- #!if(hasShift,":<<1","")
- #!if(isRnd,":rnd",""),
- [] > {
- bits<5> Rdd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b0100;
- let Inst{23} = hasShift;
- let Inst{22} = isUnsigned;
- let Inst{21} = isRnd;
- let Inst{6-5} = LHbits;
- let Inst{4-0} = Rdd;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
-}
-
-def M2_mpyd_hh_s0: T_M2_mpyd<0b11, 0, 0, 0>;
-def M2_mpyd_hl_s0: T_M2_mpyd<0b10, 0, 0, 0>;
-def M2_mpyd_lh_s0: T_M2_mpyd<0b01, 0, 0, 0>;
-def M2_mpyd_ll_s0: T_M2_mpyd<0b00, 0, 0, 0>;
-
-def M2_mpyd_hh_s1: T_M2_mpyd<0b11, 0, 1, 0>;
-def M2_mpyd_hl_s1: T_M2_mpyd<0b10, 0, 1, 0>;
-def M2_mpyd_lh_s1: T_M2_mpyd<0b01, 0, 1, 0>;
-def M2_mpyd_ll_s1: T_M2_mpyd<0b00, 0, 1, 0>;
-
-def M2_mpyd_rnd_hh_s0: T_M2_mpyd<0b11, 1, 0, 0>;
-def M2_mpyd_rnd_hl_s0: T_M2_mpyd<0b10, 1, 0, 0>;
-def M2_mpyd_rnd_lh_s0: T_M2_mpyd<0b01, 1, 0, 0>;
-def M2_mpyd_rnd_ll_s0: T_M2_mpyd<0b00, 1, 0, 0>;
-
-def M2_mpyd_rnd_hh_s1: T_M2_mpyd<0b11, 1, 1, 0>;
-def M2_mpyd_rnd_hl_s1: T_M2_mpyd<0b10, 1, 1, 0>;
-def M2_mpyd_rnd_lh_s1: T_M2_mpyd<0b01, 1, 1, 0>;
-def M2_mpyd_rnd_ll_s1: T_M2_mpyd<0b00, 1, 1, 0>;
-
-//Rdd=mpyu(Rs.[HL],Rt.[HL])[:<<1]
-def M2_mpyud_hh_s0: T_M2_mpyd<0b11, 0, 0, 1>;
-def M2_mpyud_hl_s0: T_M2_mpyd<0b10, 0, 0, 1>;
-def M2_mpyud_lh_s0: T_M2_mpyd<0b01, 0, 0, 1>;
-def M2_mpyud_ll_s0: T_M2_mpyd<0b00, 0, 0, 1>;
-
-def M2_mpyud_hh_s1: T_M2_mpyd<0b11, 0, 1, 1>;
-def M2_mpyud_hl_s1: T_M2_mpyd<0b10, 0, 1, 1>;
-def M2_mpyud_lh_s1: T_M2_mpyd<0b01, 0, 1, 1>;
-def M2_mpyud_ll_s1: T_M2_mpyd<0b00, 0, 1, 1>;
-
-//===----------------------------------------------------------------------===//
-// Template Class for xtype mpy:
-// Vector multiply
-// Complex multiply
-// multiply 32X32 and use full result
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in
-class T_XTYPE_mpy64 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit isSat, bit hasShift, bit isConj>
- : MInst <(outs DoubleRegs:$Rdd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rdd = "#mnemonic#"($Rs, $Rt"#!if(isConj,"*)",")")
- #!if(hasShift,":<<1","")
- #!if(isSat,":sat",""),
- [] > {
- bits<5> Rdd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b0101;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rdd;
- }
-
-//===----------------------------------------------------------------------===//
-// Template Class for xtype mpy with accumulation into 64-bit:
-// Vector multiply
-// Complex multiply
-// multiply 32X32 and use full result
-//===----------------------------------------------------------------------===//
-class T_XTYPE_mpy64_acc <string op1, string op2, bits<3> MajOp, bits<3> MinOp,
- bit isSat, bit hasShift, bit isConj>
- : MInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
- "$Rxx "#op2#"= "#op1#"($Rs, $Rt"#!if(isConj,"*)",")")
- #!if(hasShift,":<<1","")
- #!if(isSat,":sat",""),
-
- [] , "$dst2 = $Rxx" > {
- bits<5> Rxx;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b0111;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rxx;
- }
-
-// MPY - Multiply and use full result
-// Rdd = mpy[u](Rs,Rt)
-def M2_dpmpyss_s0 : T_XTYPE_mpy64 < "mpy", 0b000, 0b000, 0, 0, 0>;
-def M2_dpmpyuu_s0 : T_XTYPE_mpy64 < "mpyu", 0b010, 0b000, 0, 0, 0>;
-
-// Rxx[+-]= mpy[u](Rs,Rt)
-def M2_dpmpyss_acc_s0 : T_XTYPE_mpy64_acc < "mpy", "+", 0b000, 0b000, 0, 0, 0>;
-def M2_dpmpyss_nac_s0 : T_XTYPE_mpy64_acc < "mpy", "-", 0b001, 0b000, 0, 0, 0>;
-def M2_dpmpyuu_acc_s0 : T_XTYPE_mpy64_acc < "mpyu", "+", 0b010, 0b000, 0, 0, 0>;
-def M2_dpmpyuu_nac_s0 : T_XTYPE_mpy64_acc < "mpyu", "-", 0b011, 0b000, 0, 0, 0>;
-
-// Complex multiply real or imaginary
-// Rxx=cmpy[ir](Rs,Rt)
-def M2_cmpyi_s0 : T_XTYPE_mpy64 < "cmpyi", 0b000, 0b001, 0, 0, 0>;
-def M2_cmpyr_s0 : T_XTYPE_mpy64 < "cmpyr", 0b000, 0b010, 0, 0, 0>;
-
-// Rxx+=cmpy[ir](Rs,Rt)
-def M2_cmaci_s0 : T_XTYPE_mpy64_acc < "cmpyi", "+", 0b000, 0b001, 0, 0, 0>;
-def M2_cmacr_s0 : T_XTYPE_mpy64_acc < "cmpyr", "+", 0b000, 0b010, 0, 0, 0>;
-
-// Complex multiply
-// Rdd=cmpy(Rs,Rt)[:<<]:sat
-def M2_cmpys_s0 : T_XTYPE_mpy64 < "cmpy", 0b000, 0b110, 1, 0, 0>;
-def M2_cmpys_s1 : T_XTYPE_mpy64 < "cmpy", 0b100, 0b110, 1, 1, 0>;
-
-// Rdd=cmpy(Rs,Rt*)[:<<]:sat
-def M2_cmpysc_s0 : T_XTYPE_mpy64 < "cmpy", 0b010, 0b110, 1, 0, 1>;
-def M2_cmpysc_s1 : T_XTYPE_mpy64 < "cmpy", 0b110, 0b110, 1, 1, 1>;
-
-// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
-def M2_cmacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b000, 0b110, 1, 0, 0>;
-def M2_cnacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b000, 0b111, 1, 0, 0>;
-def M2_cmacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b100, 0b110, 1, 1, 0>;
-def M2_cnacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b100, 0b111, 1, 1, 0>;
-
-// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
-def M2_cmacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b010, 0b110, 1, 0, 1>;
-def M2_cnacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b010, 0b111, 1, 0, 1>;
-def M2_cmacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b110, 0b110, 1, 1, 1>;
-def M2_cnacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b110, 0b111, 1, 1, 1>;
-
-// Vector multiply halfwords
-// Rdd=vmpyh(Rs,Rt)[:<<]:sat
-//let Defs = [USR_OVF] in {
- def M2_vmpy2s_s1 : T_XTYPE_mpy64 < "vmpyh", 0b100, 0b101, 1, 1, 0>;
- def M2_vmpy2s_s0 : T_XTYPE_mpy64 < "vmpyh", 0b000, 0b101, 1, 0, 0>;
-//}
-
-// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
-def M2_vmac2 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>;
-def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>;
-def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>;
-
-//===----------------------------------------------------------------------===//
-// MTYPE/MPYH -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MTYPE/MPYS +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// MTYPE/MPYS -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MTYPE/VB +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// MTYPE/VB -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MTYPE/VH +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// MTYPE/VH -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ST +
-//===----------------------------------------------------------------------===//
-///
-// Store doubleword.
-//===----------------------------------------------------------------------===//
-// Template class for non-predicated post increment stores with immediate offset
-//===----------------------------------------------------------------------===//
-let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc in
-class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<4> MajOp, bit isHalf >
- : STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
- mnemonic#"($src1++#$offset) = $src2"#!if(isHalf, ".h", ""),
- [], "$src1 = $_dst_" >,
- AddrModeRel {
- bits<5> src1;
- bits<5> src2;
- bits<7> offset;
- bits<4> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
- !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0})));
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
-
- let IClass = 0b1010;
-
- let Inst{27-25} = 0b101;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = 0b0;
- let Inst{12-8} = src2;
- let Inst{7} = 0b0;
- let Inst{6-3} = offsetBits;
- let Inst{1} = 0b0;
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated post increment stores with immediate offset
-//===----------------------------------------------------------------------===//
-let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
-class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew>
- : STInst <(outs IntRegs:$_dst_),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
- !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2++#$offset) = $src3"#!if(isHalf, ".h", ""),
- [], "$src2 = $_dst_" >,
- AddrModeRel {
- bits<2> src1;
- bits<5> src2;
- bits<7> offset;
- bits<5> src3;
- bits<4> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
- !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0})));
-
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = isPredNot;
-
- let IClass = 0b1010;
-
- let Inst{27-25} = 0b101;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13} = 0b1;
- let Inst{12-8} = src3;
- let Inst{7} = isPredNew;
- let Inst{6-3} = offsetBits;
- let Inst{2} = isPredNot;
- let Inst{1-0} = src1;
- }
-
-multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
- Operand ImmOp, bits<4> MajOp, bit isHalf = 0 > {
-
- let BaseOpcode = "POST_"#BaseOp in {
- def S2_#NAME#_pi : T_store_pi <mnemonic, RC, ImmOp, MajOp, isHalf>;
-
- // Predicated
- def S2_p#NAME#t_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 0, 0>;
- def S2_p#NAME#f_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 1, 0>;
-
- // Predicated new
- def S2_p#NAME#tnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp,
- isHalf, 0, 1>;
- def S2_p#NAME#fnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp,
- isHalf, 1, 1>;
- }
-}
-
-let accessSize = ByteAccess in
-defm storerb: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm, 0b1000>;
-
-let accessSize = HalfWordAccess in
-defm storerh: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm, 0b1010>;
-
-let accessSize = WordAccess in
-defm storeri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm, 0b1100>;
-
-let accessSize = DoubleWordAccess in
-defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>;
-
-let accessSize = HalfWordAccess, isNVStorable = 0 in
-defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>;
-
-//===----------------------------------------------------------------------===//
-// Template class for post increment stores with register offset.
-//===----------------------------------------------------------------------===//
-class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
- MemAccessSize AccessSz, bit isHalf = 0>
- : STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ModRegs:$src2, RC:$src3),
- mnemonic#"($src1++$src2) = $src3"#!if(isHalf, ".h", ""),
- [], "$src1 = $_dst_" > {
- bits<5> src1;
- bits<1> src2;
- bits<5> src3;
- let accessSize = AccessSz;
-
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if(!eq(mnemonic,"memd"), 0, !if(isHalf,0,1));
-
- let IClass = 0b1010;
-
- let Inst{27-24} = 0b1101;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = src2;
- let Inst{12-8} = src3;
- let Inst{7} = 0b0;
- }
-
-def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>;
-def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>;
-def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>;
-def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>;
-def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>;
-
-let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
-class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<3> MajOp, bit isH = 0>
- : STInst <(outs),
- (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- mnemonic#"($src1+#$src2) = $src3"#!if(isH,".h","")>,
- AddrModeRel, ImmRegRel {
- bits<5> src1;
- bits<14> src2; // Actual address offset
- bits<5> src3;
- bits<11> offsetBits; // Represents offset encoding
-
- string ImmOpStr = !cast<string>(ImmOp);
-
- let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14,
- !if (!eq(ImmOpStr, "s11_2Ext"), 13,
- !if (!eq(ImmOpStr, "s11_1Ext"), 12,
- /* s11_0Ext */ 11)));
- let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), src2{13-3},
- !if (!eq(ImmOpStr, "s11_2Ext"), src2{12-2},
- !if (!eq(ImmOpStr, "s11_1Ext"), src2{11-1},
- /* s11_0Ext */ src2{10-0})));
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
- let IClass = 0b1010;
-
- let Inst{27} = 0b0;
- let Inst{26-25} = offsetBits{10-9};
- let Inst{24} = 0b1;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = offsetBits{8};
- let Inst{12-8} = src3;
- let Inst{7-0} = offsetBits{7-0};
- }
-
-let opExtendable = 2, isPredicated = 1 in
-class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<3>MajOp, bit PredNot, bit isPredNew, bit isH = 0>
- : STInst <(outs),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2+#$src3) = $src4"#!if(isH,".h",""),
- [],"",V2LDST_tc_st_SLOT01 >,
- AddrModeRel, ImmRegRel {
- bits<2> src1;
- bits<5> src2;
- bits<9> src3; // Actual address offset
- bits<5> src4;
- bits<6> offsetBits; // Represents offset encoding
-
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = PredNot;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9,
- !if (!eq(ImmOpStr, "u6_2Ext"), 8,
- !if (!eq(ImmOpStr, "u6_1Ext"), 7,
- /* u6_0Ext */ 6)));
- let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), src3{8-3},
- !if (!eq(ImmOpStr, "u6_2Ext"), src3{7-2},
- !if (!eq(ImmOpStr, "u6_1Ext"), src3{6-1},
- /* u6_0Ext */ src3{5-0})));
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
-
- let IClass = 0b0100;
-
- let Inst{27} = 0b0;
- let Inst{26} = PredNot;
- let Inst{25} = isPredNew;
- let Inst{24} = 0b0;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13} = offsetBits{5};
- let Inst{12-8} = src4;
- let Inst{7-3} = offsetBits{4-0};
- let Inst{1-0} = src1;
- }
-
-let isExtendable = 1, hasSideEffects = 0 in
-multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, Operand predImmOp, bits<3> MajOp, bit isH = 0> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
- def S2_#NAME#_io : T_store_io <mnemonic, RC, ImmOp, MajOp, isH>;
-
- // Predicated
- def S2_p#NAME#t_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 0, 0, isH>;
- def S2_p#NAME#f_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 1, 0, isH>;
-
- // Predicated new
- def S4_p#NAME#tnew_io : T_pstore_io <mnemonic, RC, predImmOp,
- MajOp, 0, 1, isH>;
- def S4_p#NAME#fnew_io : T_pstore_io <mnemonic, RC, predImmOp,
- MajOp, 1, 1, isH>;
- }
-}
-
-let addrMode = BaseImmOffset, InputType = "imm" in {
- let accessSize = ByteAccess in
- defm storerb: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext, u6_0Ext, 0b000>;
-
- let accessSize = HalfWordAccess, opExtentAlign = 1 in
- defm storerh: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext, u6_1Ext, 0b010>;
-
- let accessSize = WordAccess, opExtentAlign = 2 in
- defm storeri: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext, u6_2Ext, 0b100>;
-
- let accessSize = DoubleWordAccess, isNVStorable = 0, opExtentAlign = 3 in
- defm storerd: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext,
- u6_3Ext, 0b110>;
-
- let accessSize = HalfWordAccess, opExtentAlign = 1 in
- defm storerf: ST_Idxd < "memh", "STrif", IntRegs, s11_1Ext,
- u6_1Ext, 0b011, 1>;
-}
-
-// Store predicate.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
- isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
-def STriw_pred : STInst<(outs),
- (ins IntRegs:$addr, s11_2Ext:$off, PredRegs:$src1),
- ".error \"should not emit\"", []>;
-// Store modifier.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
- isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
-def STriw_mod : STInst<(outs),
- (ins IntRegs:$addr, s11_2Ext:$off, ModRegs:$src1),
- ".error \"should not emit\"", []>;
-
-// S2_allocframe: Allocate stack frame.
-let Defs = [R29, R30], Uses = [R29, R31, R30],
- hasSideEffects = 0, accessSize = DoubleWordAccess in
-def S2_allocframe: ST0Inst <
- (outs), (ins u11_3Imm:$u11_3),
- "allocframe(#$u11_3)" > {
- bits<14> u11_3;
-
- let IClass = 0b1010;
- let Inst{27-16} = 0b000010011101;
- let Inst{13-11} = 0b000;
- let Inst{10-0} = u11_3{13-3};
- }
-
-// S2_storer[bhwdf]_pci: Store byte/half/word/double.
-// S2_storer[bhwdf]_pci -> S2_storerbnew_pci
-let Uses = [CS], addrMode = PostInc in
-class T_store_pci <string mnemonic, RegisterClass RC,
- Operand Imm, bits<4>MajOp,
- MemAccessSize AlignSize, string RegSrc = "Rt">
- : STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, RC:$Rt),
- #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $"#RegSrc#"",
- [] ,
- "$Rz = $_dst_" > {
- bits<5> Rz;
- bits<7> offset;
- bits<1> Mu;
- bits<5> Rt;
- let accessSize = AlignSize;
- let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
- !if(!eq(RegSrc,"Rt.h"), 0, 1));
-
- let IClass = 0b1010;
- let Inst{27-25} = 0b100;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12-8} = Rt;
- let Inst{7} = 0b0;
- let Inst{6-3} =
- !if (!eq(!cast<string>(AlignSize), "DoubleWordAccess"), offset{6-3},
- !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2},
- !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1},
- /* ByteAccess */ offset{3-0})));
- let Inst{1} = 0b0;
- }
-
-def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000,
- ByteAccess>;
-def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010,
- HalfWordAccess>;
-def S2_storerf_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1011,
- HalfWordAccess, "Rt.h">;
-def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100,
- WordAccess>;
-def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110,
- DoubleWordAccess>;
-
-let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4,
- addrMode = PostInc in
-class T_storenew_pci <string mnemonic, Operand Imm,
- bits<2>MajOp, MemAccessSize AlignSize>
- : NVInst < (outs IntRegs:$_dst_),
- (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, IntRegs:$Nt),
- #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $Nt.new",
- [],
- "$Rz = $_dst_"> {
- bits<5> Rz;
- bits<6> offset;
- bits<1> Mu;
- bits<3> Nt;
-
- let accessSize = AlignSize;
-
- let IClass = 0b1010;
- let Inst{27-21} = 0b1001101;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = Nt;
- let Inst{7} = 0b0;
- let Inst{6-3} =
- !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2},
- !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1},
- /* ByteAccess */ offset{3-0}));
- let Inst{1} = 0b0;
- }
-
-def S2_storerbnew_pci : T_storenew_pci <"memb", s4_0Imm, 0b00, ByteAccess>;
-def S2_storerhnew_pci : T_storenew_pci <"memh", s4_1Imm, 0b01, HalfWordAccess>;
-def S2_storerinew_pci : T_storenew_pci <"memw", s4_2Imm, 0b10, WordAccess>;
-
-//===----------------------------------------------------------------------===//
-// Circular stores with auto-increment register
-//===----------------------------------------------------------------------===//
-let Uses = [CS], addrMode = PostInc in
-class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
- MemAccessSize AlignSize, string RegSrc = "Rt">
- : STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$Rz, ModRegs:$Mu, RC:$Rt),
- #mnemonic#"($Rz ++ I:circ($Mu)) = $"#RegSrc#"",
- [],
- "$Rz = $_dst_" > {
- bits<5> Rz;
- bits<1> Mu;
- bits<5> Rt;
-
- let accessSize = AlignSize;
- let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
- !if(!eq(RegSrc,"Rt.h"), 0, 1));
-
- let IClass = 0b1010;
- let Inst{27-25} = 0b100;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12-8} = Rt;
- let Inst{7} = 0b0;
- let Inst{1} = 0b1;
- }
-
-def S2_storerb_pcr : T_store_pcr<"memb", IntRegs, 0b1000, ByteAccess>;
-def S2_storerh_pcr : T_store_pcr<"memh", IntRegs, 0b1010, HalfWordAccess>;
-def S2_storeri_pcr : T_store_pcr<"memw", IntRegs, 0b1100, WordAccess>;
-def S2_storerd_pcr : T_store_pcr<"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
-def S2_storerf_pcr : T_store_pcr<"memh", IntRegs, 0b1011,
- HalfWordAccess, "Rt.h">;
-
-//===----------------------------------------------------------------------===//
-// Circular .new stores with auto-increment register
-//===----------------------------------------------------------------------===//
-let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3,
- addrMode = PostInc in
-class T_storenew_pcr <string mnemonic, bits<2>MajOp,
- MemAccessSize AlignSize>
- : NVInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt),
- #mnemonic#"($Rz ++ I:circ($Mu)) = $Nt.new" ,
- [] ,
- "$Rz = $_dst_"> {
- bits<5> Rz;
- bits<1> Mu;
- bits<3> Nt;
-
- let accessSize = AlignSize;
-
- let IClass = 0b1010;
- let Inst{27-21} = 0b1001101;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = Nt;
- let Inst{7} = 0b0;
- let Inst{1} = 0b1;
- }
-
-def S2_storerbnew_pcr : T_storenew_pcr <"memb", 0b00, ByteAccess>;
-def S2_storerhnew_pcr : T_storenew_pcr <"memh", 0b01, HalfWordAccess>;
-def S2_storerinew_pcr : T_storenew_pcr <"memw", 0b10, WordAccess>;
-
-//===----------------------------------------------------------------------===//
-// Bit-reversed stores with auto-increment register
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, addrMode = PostInc in
-class T_store_pbr<string mnemonic, RegisterClass RC,
- MemAccessSize addrSize, bits<3> majOp,
- bit isHalf = 0>
- : STInst
- <(outs IntRegs:$_dst_),
- (ins IntRegs:$Rz, ModRegs:$Mu, RC:$src),
- #mnemonic#"($Rz ++ $Mu:brev) = $src"#!if (!eq(isHalf, 1), ".h", ""),
- [], "$Rz = $_dst_" > {
-
- let accessSize = addrSize;
-
- bits<5> Rz;
- bits<1> Mu;
- bits<5> src;
-
- let IClass = 0b1010;
-
- let Inst{27-24} = 0b1111;
- let Inst{23-21} = majOp;
- let Inst{7} = 0b0;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12-8} = src;
- }
-
-let isNVStorable = 1 in {
- let BaseOpcode = "S2_storerb_pbr" in
- def S2_storerb_pbr : T_store_pbr<"memb", IntRegs, ByteAccess,
- 0b000>, NewValueRel;
- let BaseOpcode = "S2_storerh_pbr" in
- def S2_storerh_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess,
- 0b010>, NewValueRel;
- let BaseOpcode = "S2_storeri_pbr" in
- def S2_storeri_pbr : T_store_pbr<"memw", IntRegs, WordAccess,
- 0b100>, NewValueRel;
-}
-
-def S2_storerf_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess, 0b011, 1>;
-def S2_storerd_pbr : T_store_pbr<"memd", DoubleRegs, DoubleWordAccess, 0b110>;
-
-//===----------------------------------------------------------------------===//
-// Bit-reversed .new stores with auto-increment register
-//===----------------------------------------------------------------------===//
-let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3,
- hasSideEffects = 0, addrMode = PostInc in
-class T_storenew_pbr<string mnemonic, MemAccessSize addrSize, bits<2> majOp>
- : NVInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt),
- #mnemonic#"($Rz ++ $Mu:brev) = $Nt.new", [],
- "$Rz = $_dst_">, NewValueRel {
- let accessSize = addrSize;
- bits<5> Rz;
- bits<1> Mu;
- bits<3> Nt;
-
- let IClass = 0b1010;
-
- let Inst{27-21} = 0b1111101;
- let Inst{12-11} = majOp;
- let Inst{7} = 0b0;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{10-8} = Nt;
- }
-
-let BaseOpcode = "S2_storerb_pbr" in
-def S2_storerbnew_pbr : T_storenew_pbr<"memb", ByteAccess, 0b00>;
-
-let BaseOpcode = "S2_storerh_pbr" in
-def S2_storerhnew_pbr : T_storenew_pbr<"memh", HalfWordAccess, 0b01>;
-
-let BaseOpcode = "S2_storeri_pbr" in
-def S2_storerinew_pbr : T_storenew_pbr<"memw", WordAccess, 0b10>;
-
-//===----------------------------------------------------------------------===//
-// ST -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Template class for S_2op instructions.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in
-class T_S2op_1 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
- RegisterClass RCIn, bits<2> MajOp, bits<3> MinOp, bit isSat>
- : SInst <(outs RCOut:$dst), (ins RCIn:$src),
- "$dst = "#mnemonic#"($src)"#!if(isSat, ":sat", ""),
- [], "", S_2op_tc_1_SLOT23 > {
- bits<5> dst;
- bits<5> src;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = RegTyBits;
- let Inst{23-22} = MajOp;
- let Inst{21} = 0b0;
- let Inst{20-16} = src;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = dst;
- }
-
-class T_S2op_1_di <string mnemonic, bits<2> MajOp, bits<3> MinOp>
- : T_S2op_1 <mnemonic, 0b0100, DoubleRegs, IntRegs, MajOp, MinOp, 0>;
-
-let hasNewValue = 1 in
-class T_S2op_1_id <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0>
- : T_S2op_1 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, isSat>;
-
-let hasNewValue = 1 in
-class T_S2op_1_ii <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0>
- : T_S2op_1 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp, isSat>;
-
-// Vector sign/zero extend
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
- def S2_vsxtbh : T_S2op_1_di <"vsxtbh", 0b00, 0b000>;
- def S2_vsxthw : T_S2op_1_di <"vsxthw", 0b00, 0b100>;
- def S2_vzxtbh : T_S2op_1_di <"vzxtbh", 0b00, 0b010>;
- def S2_vzxthw : T_S2op_1_di <"vzxthw", 0b00, 0b110>;
-}
-
-// Vector splat bytes/halfwords
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
- def S2_vsplatrb : T_S2op_1_ii <"vsplatb", 0b01, 0b111>;
- def S2_vsplatrh : T_S2op_1_di <"vsplath", 0b01, 0b010>;
-}
-
-// Sign extend word to doubleword
-def A2_sxtw : T_S2op_1_di <"sxtw", 0b01, 0b000>;
-
-// Vector saturate and pack
-let Defs = [USR_OVF] in {
- def S2_svsathb : T_S2op_1_ii <"vsathb", 0b10, 0b000>;
- def S2_svsathub : T_S2op_1_ii <"vsathub", 0b10, 0b010>;
- def S2_vsathb : T_S2op_1_id <"vsathb", 0b00, 0b110>;
- def S2_vsathub : T_S2op_1_id <"vsathub", 0b00, 0b000>;
- def S2_vsatwh : T_S2op_1_id <"vsatwh", 0b00, 0b010>;
- def S2_vsatwuh : T_S2op_1_id <"vsatwuh", 0b00, 0b100>;
-}
-
-// Vector truncate
-def S2_vtrunohb : T_S2op_1_id <"vtrunohb", 0b10, 0b000>;
-def S2_vtrunehb : T_S2op_1_id <"vtrunehb", 0b10, 0b010>;
-
-// Swizzle the bytes of a word
-def A2_swiz : T_S2op_1_ii <"swiz", 0b10, 0b111>;
-
-// Saturate
-let Defs = [USR_OVF] in {
- def A2_sat : T_S2op_1_id <"sat", 0b11, 0b000>;
- def A2_satb : T_S2op_1_ii <"satb", 0b11, 0b111>;
- def A2_satub : T_S2op_1_ii <"satub", 0b11, 0b110>;
- def A2_sath : T_S2op_1_ii <"sath", 0b11, 0b100>;
- def A2_satuh : T_S2op_1_ii <"satuh", 0b11, 0b101>;
- def A2_roundsat : T_S2op_1_id <"round", 0b11, 0b001, 0b1>;
-}
-
-let Itinerary = S_2op_tc_2_SLOT23 in {
- // Vector round and pack
- def S2_vrndpackwh : T_S2op_1_id <"vrndwh", 0b10, 0b100>;
-
- let Defs = [USR_OVF] in
- def S2_vrndpackwhs : T_S2op_1_id <"vrndwh", 0b10, 0b110, 1>;
-
- // Bit reverse
- def S2_brev : T_S2op_1_ii <"brev", 0b01, 0b110>;
-
- // Absolute value word
- def A2_abs : T_S2op_1_ii <"abs", 0b10, 0b100>;
-
- let Defs = [USR_OVF] in
- def A2_abssat : T_S2op_1_ii <"abs", 0b10, 0b101, 1>;
-
- // Negate with saturation
- let Defs = [USR_OVF] in
- def A2_negsat : T_S2op_1_ii <"neg", 0b10, 0b110, 1>;
-}
-
-class T_S2op_2 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
- RegisterClass RCIn, bits<3> MajOp, bits<3> MinOp,
- bit isSat, bit isRnd, list<dag> pattern = []>
- : SInst <(outs RCOut:$dst),
- (ins RCIn:$src, u5_0Imm:$u5),
- "$dst = "#mnemonic#"($src, #$u5)"#!if(isSat, ":sat", "")
- #!if(isRnd, ":rnd", ""),
- pattern, "", S_2op_tc_2_SLOT23> {
- bits<5> dst;
- bits<5> src;
- bits<5> u5;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = RegTyBits;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src;
- let Inst{13} = 0b0;
- let Inst{12-8} = u5;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = dst;
- }
-
-class T_S2op_2_di <string mnemonic, bits<3> MajOp, bits<3> MinOp>
- : T_S2op_2 <mnemonic, 0b1000, DoubleRegs, IntRegs, MajOp, MinOp, 0, 0>;
-
-let hasNewValue = 1 in
-class T_S2op_2_id <string mnemonic, bits<3> MajOp, bits<3> MinOp>
- : T_S2op_2 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, 0, 0>;
-
-let hasNewValue = 1 in
-class T_S2op_2_ii <string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit isSat = 0, bit isRnd = 0, list<dag> pattern = []>
- : T_S2op_2 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp,
- isSat, isRnd, pattern>;
-
-class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd>
- : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0, []>;
-
-// Vector arithmetic shift right by immediate with truncate and pack
-def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>;
-
-// Arithmetic/logical shift right/left by immediate
-let Itinerary = S_2op_tc_1_SLOT23 in {
- def S2_asr_i_r : T_S2op_shift <"asr", 0b000, 0b000, sra>;
- def S2_lsr_i_r : T_S2op_shift <"lsr", 0b000, 0b001, srl>;
- def S2_asl_i_r : T_S2op_shift <"asl", 0b000, 0b010, shl>;
-}
-
-// Shift left by immediate with saturation
-let Defs = [USR_OVF] in
-def S2_asl_i_r_sat : T_S2op_2_ii <"asl", 0b010, 0b010, 1>;
-
-// Shift right with round
-def S2_asr_i_r_rnd : T_S2op_2_ii <"asr", 0b010, 0b000, 0, 1>;
-
-let isAsmParserOnly = 1 in
-def S2_asr_i_r_rnd_goodsyntax
- : SInst <(outs IntRegs:$dst), (ins IntRegs:$src, u5_0Imm:$u5),
- "$dst = asrrnd($src, #$u5)",
- [], "", S_2op_tc_1_SLOT23>;
-
-let isAsmParserOnly = 1 in
-def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src),
- "$dst = not($src)">;
-
-class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0>
- : SInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
- "$Rdd = "#opc#"($Rss)"#!if(!eq(sat, 1),":sat","")> {
- bits<5> Rss;
- bits<5> Rdd;
- let IClass = 0b1000;
- let Inst{27-24} = 0;
- let Inst{23-22} = MajOp;
- let Inst{20-16} = Rss;
- let Inst{7-5} = minOp;
- let Inst{4-0} = Rdd;
-}
-
-def A2_absp : T_S2op_3 <"abs", 0b10, 0b110>;
-def A2_negp : T_S2op_3 <"neg", 0b10, 0b101>;
-def A2_notp : T_S2op_3 <"not", 0b10, 0b100>;
-
-// Innterleave/deinterleave
-def S2_interleave : T_S2op_3 <"interleave", 0b11, 0b101>;
-def S2_deinterleave : T_S2op_3 <"deinterleave", 0b11, 0b100>;
-
-// Vector Complex conjugate
-def A2_vconj : T_S2op_3 <"vconj", 0b10, 0b111, 1>;
-
-// Vector saturate without pack
-def S2_vsathb_nopack : T_S2op_3 <"vsathb", 0b00, 0b111>;
-def S2_vsathub_nopack : T_S2op_3 <"vsathub", 0b00, 0b100>;
-def S2_vsatwh_nopack : T_S2op_3 <"vsatwh", 0b00, 0b110>;
-def S2_vsatwuh_nopack : T_S2op_3 <"vsatwuh", 0b00, 0b101>;
-
-// Vector absolute value halfwords with and without saturation
-// Rdd64=vabsh(Rss64)[:sat]
-def A2_vabsh : T_S2op_3 <"vabsh", 0b01, 0b100>;
-def A2_vabshsat : T_S2op_3 <"vabsh", 0b01, 0b101, 1>;
-
-// Vector absolute value words with and without saturation
-def A2_vabsw : T_S2op_3 <"vabsw", 0b01, 0b110>;
-def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>;
-
-//===----------------------------------------------------------------------===//
-// STYPE/BIT +
-//===----------------------------------------------------------------------===//
-// Bit count
-
-let hasSideEffects = 0, hasNewValue = 1 in
-class T_COUNT_LEADING<string MnOp, bits<3> MajOp, bits<3> MinOp, bit Is32,
- dag Out, dag Inp>
- : SInst<Out, Inp, "$Rd = "#MnOp#"($Rs)", [], "", S_2op_tc_1_SLOT23> {
- bits<5> Rs;
- bits<5> Rd;
- let IClass = 0b1000;
- let Inst{27} = 0b1;
- let Inst{26} = Is32;
- let Inst{25-24} = 0b00;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rd;
-}
-
-class T_COUNT_LEADING_32<string MnOp, bits<3> MajOp, bits<3> MinOp>
- : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b1,
- (outs IntRegs:$Rd), (ins IntRegs:$Rs)>;
-
-class T_COUNT_LEADING_64<string MnOp, bits<3> MajOp, bits<3> MinOp>
- : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b0,
- (outs IntRegs:$Rd), (ins DoubleRegs:$Rs)>;
-
-def S2_cl0 : T_COUNT_LEADING_32<"cl0", 0b000, 0b101>;
-def S2_cl1 : T_COUNT_LEADING_32<"cl1", 0b000, 0b110>;
-def S2_ct0 : T_COUNT_LEADING_32<"ct0", 0b010, 0b100>;
-def S2_ct1 : T_COUNT_LEADING_32<"ct1", 0b010, 0b101>;
-def S2_cl0p : T_COUNT_LEADING_64<"cl0", 0b010, 0b010>;
-def S2_cl1p : T_COUNT_LEADING_64<"cl1", 0b010, 0b100>;
-def S2_clb : T_COUNT_LEADING_32<"clb", 0b000, 0b100>;
-def S2_clbp : T_COUNT_LEADING_64<"clb", 0b010, 0b000>;
-def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>;
-
-// The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td.
-
-// Bit set/clear/toggle
-
-let hasSideEffects = 0, hasNewValue = 1 in
-class T_SCT_BIT_IMM<string MnOp, bits<3> MinOp>
- : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, u5_0Imm:$u5),
- "$Rd = "#MnOp#"($Rs, #$u5)", [], "", S_2op_tc_1_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> u5;
- let IClass = 0b1000;
- let Inst{27-21} = 0b1100110;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-8} = u5;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rd;
-}
-
-let hasSideEffects = 0, hasNewValue = 1 in
-class T_SCT_BIT_REG<string MnOp, bits<2> MinOp>
- : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = "#MnOp#"($Rs, $Rt)", [], "", S_3op_tc_1_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
- let IClass = 0b1100;
- let Inst{27-22} = 0b011010;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7-6} = MinOp;
- let Inst{4-0} = Rd;
-}
-
-def S2_clrbit_i : T_SCT_BIT_IMM<"clrbit", 0b001>;
-def S2_setbit_i : T_SCT_BIT_IMM<"setbit", 0b000>;
-def S2_togglebit_i : T_SCT_BIT_IMM<"togglebit", 0b010>;
-def S2_clrbit_r : T_SCT_BIT_REG<"clrbit", 0b01>;
-def S2_setbit_r : T_SCT_BIT_REG<"setbit", 0b00>;
-def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>;
-
-// Bit test
-
-let hasSideEffects = 0 in
-class T_TEST_BIT_IMM<string MnOp, bits<3> MajOp>
- : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u5_0Imm:$u5),
- "$Pd = "#MnOp#"($Rs, #$u5)",
- [], "", S_2op_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rs;
- bits<5> u5;
- let IClass = 0b1000;
- let Inst{27-24} = 0b0101;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{13} = 0;
- let Inst{12-8} = u5;
- let Inst{1-0} = Pd;
-}
-
-let hasSideEffects = 0 in
-class T_TEST_BIT_REG<string MnOp, bit IsNeg>
- : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Pd = "#MnOp#"($Rs, $Rt)",
- [], "", S_3op_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rs;
- bits<5> Rt;
- let IClass = 0b1100;
- let Inst{27-22} = 0b011100;
- let Inst{21} = IsNeg;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{1-0} = Pd;
-}
-
-def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>;
-def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>;
-
-let hasSideEffects = 0 in
-class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg>
- : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6_0Imm:$u6),
- "$Pd = "#MnOp#"($Rs, #$u6)",
- [], "", S_2op_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rs;
- bits<6> u6;
- let IClass = 0b1000;
- let Inst{27-24} = 0b0101;
- let Inst{23-22} = MajOp;
- let Inst{21} = IsNeg;
- let Inst{20-16} = Rs;
- let Inst{13-8} = u6;
- let Inst{1-0} = Pd;
-}
-
-let hasSideEffects = 0 in
-class T_TEST_BITS_REG<string MnOp, bits<2> MajOp, bit IsNeg>
- : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Pd = "#MnOp#"($Rs, $Rt)",
- [], "", S_3op_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rs;
- bits<5> Rt;
- let IClass = 0b1100;
- let Inst{27-24} = 0b0111;
- let Inst{23-22} = MajOp;
- let Inst{21} = IsNeg;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{1-0} = Pd;
-}
-
-def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>;
-def C2_bitsclr : T_TEST_BITS_REG<"bitsclr", 0b10, 0>;
-def C2_bitsset : T_TEST_BITS_REG<"bitsset", 0b01, 0>;
-
-//===----------------------------------------------------------------------===//
-// STYPE/BIT -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// STYPE/COMPLEX +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// STYPE/COMPLEX -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XTYPE/PERM +
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XTYPE/PERM -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// STYPE/PRED +
-//===----------------------------------------------------------------------===//
-
-// Predicate transfer.
-let hasSideEffects = 0, hasNewValue = 1 in
-def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps),
- "$Rd = $Ps", [], "", S_2op_tc_1_SLOT23> {
- bits<5> Rd;
- bits<2> Ps;
-
- let IClass = 0b1000;
- let Inst{27-24} = 0b1001;
- let Inst{22} = 0b1;
- let Inst{17-16} = Ps;
- let Inst{4-0} = Rd;
-}
-
-// Transfer general register to predicate.
-let hasSideEffects = 0 in
-def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs),
- "$Pd = $Rs", [], "", S_2op_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rs;
-
- let IClass = 0b1000;
- let Inst{27-21} = 0b0101010;
- let Inst{20-16} = Rs;
- let Inst{1-0} = Pd;
-}
-
-let hasSideEffects = 0, isCodeGenOnly = 1 in
-def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src),
- "$dst = $src">;
-
-//===----------------------------------------------------------------------===//
-// STYPE/PRED -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// STYPE/SHIFT +
-//===----------------------------------------------------------------------===//
-class S_2OpInstImm<string Mnemonic, bits<3>MajOp, bits<3>MinOp,
- Operand Imm, list<dag> pattern = [], bit isRnd = 0>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, Imm:$src2),
- "$dst = "#Mnemonic#"($src1, #$src2)"#!if(isRnd, ":rnd", ""),
- pattern> {
- bits<5> src1;
- bits<5> dst;
- let IClass = 0b1000;
- let Inst{27-24} = 0;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = dst;
-}
-
-class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp>
- : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6_0Imm, []> {
- bits<6> src2;
- let Inst{13-8} = src2;
-}
-
-// Shift by immediate.
-def S2_asr_i_p : S_2OpInstImmI6<"asr", sra, 0b000>;
-def S2_asl_i_p : S_2OpInstImmI6<"asl", shl, 0b010>;
-def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>;
-
-// Shift left by small amount and add.
-let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in
-def S2_addasl_rrri: SInst <(outs IntRegs:$Rd),
- (ins IntRegs:$Rt, IntRegs:$Rs, u3_0Imm:$u3),
- "$Rd = addasl($Rt, $Rs, #$u3)" , [],
- "", S_3op_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rt;
- bits<5> Rs;
- bits<3> u3;
-
- let IClass = 0b1100;
-
- let Inst{27-21} = 0b0100000;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-8} = Rt;
- let Inst{7-5} = u3;
- let Inst{4-0} = Rd;
- }
-
-//===----------------------------------------------------------------------===//
-// STYPE/SHIFT -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// STYPE/VH +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// STYPE/VH -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// STYPE/VW +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// STYPE/VW -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// SYSTEM/SUPER +
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// SYSTEM/USER +
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 1, isSoloAX = 1 in
-def Y2_barrier : SYSInst<(outs), (ins), "barrier", [],"",ST_tc_st_SLOT0> {
- let Inst{31-28} = 0b1010;
- let Inst{27-21} = 0b1000000;
-}
-
-//===----------------------------------------------------------------------===//
-// SYSTEM/SUPER -
-//===----------------------------------------------------------------------===//
-
-// Generate frameindex addresses. The main reason for the offset operand is
-// that every instruction that is allowed to have frame index as an operand
-// will then have that operand followed by an immediate operand (the offset).
-// This simplifies the frame-index elimination code.
-//
-let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
- isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in {
- def PS_fi : ALU32_ri<(outs IntRegs:$Rd),
- (ins IntRegs:$fi, s32_0Imm:$off), "">;
- def PS_fia : ALU32_ri<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$fi, s32_0Imm:$off), "">;
-}
-
-//===----------------------------------------------------------------------===//
-// CRUSER - Type.
-//===----------------------------------------------------------------------===//
-// HW loop
-let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
- opExtendable = 0, hasSideEffects = 0 in
-class LOOP_iBase<string mnemonic, Operand brOp, bit mustExtend = 0>
- : CRInst<(outs), (ins brOp:$offset, u10_0Imm:$src2),
- #mnemonic#"($offset, #$src2)",
- [], "" , CR_tc_3x_SLOT3> {
- bits<9> offset;
- bits<10> src2;
-
- let IClass = 0b0110;
-
- let Inst{27-22} = 0b100100;
- let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
- let Inst{20-16} = src2{9-5};
- let Inst{12-8} = offset{8-4};
- let Inst{7-5} = src2{4-2};
- let Inst{4-3} = offset{3-2};
- let Inst{1-0} = src2{1-0};
-}
-
-let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
- opExtendable = 0, hasSideEffects = 0 in
-class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0>
- : CRInst<(outs), (ins brOp:$offset, IntRegs:$src2),
- #mnemonic#"($offset, $src2)",
- [], "" ,CR_tc_3x_SLOT3> {
- bits<9> offset;
- bits<5> src2;
-
- let IClass = 0b0110;
-
- let Inst{27-22} = 0b000000;
- let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
- let Inst{20-16} = src2;
- let Inst{12-8} = offset{8-4};
- let Inst{4-3} = offset{3-2};
- }
-
-multiclass LOOP_ri<string mnemonic> {
- def i : LOOP_iBase<mnemonic, brtarget>;
- def r : LOOP_rBase<mnemonic, brtarget>;
-
- let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in {
- def iext: LOOP_iBase<mnemonic, brtargetExt, 1>;
- def rext: LOOP_rBase<mnemonic, brtargetExt, 1>;
- }
-}
-
-
-let Defs = [SA0, LC0, USR] in
-defm J2_loop0 : LOOP_ri<"loop0">;
-
-// Interestingly only loop0's appear to set usr.lpcfg
-let Defs = [SA1, LC1] in
-defm J2_loop1 : LOOP_ri<"loop1">;
-
-let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
- Defs = [PC, LC0], Uses = [SA0, LC0] in {
-def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset),
- ":endloop0",
- []>;
-}
-
-let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
- Defs = [PC, LC1], Uses = [SA1, LC1] in {
-def ENDLOOP1 : Endloop<(outs), (ins brtarget:$offset),
- ":endloop1",
- []>;
-}
-
-// Pipelined loop instructions, sp[123]loop0
-let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0,
- isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2,
- opExtendable = 0, isPredicateLate = 1 in
-class SPLOOP_iBase<string SP, bits<2> op>
- : CRInst <(outs), (ins brtarget:$r7_2, u10_0Imm:$U10),
- "p3 = sp"#SP#"loop0($r7_2, #$U10)" > {
- bits<9> r7_2;
- bits<10> U10;
-
- let IClass = 0b0110;
-
- let Inst{22-21} = op;
- let Inst{27-23} = 0b10011;
- let Inst{20-16} = U10{9-5};
- let Inst{12-8} = r7_2{8-4};
- let Inst{7-5} = U10{4-2};
- let Inst{4-3} = r7_2{3-2};
- let Inst{1-0} = U10{1-0};
- }
-
-let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0,
- isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2,
- opExtendable = 0, isPredicateLate = 1 in
-class SPLOOP_rBase<string SP, bits<2> op>
- : CRInst <(outs), (ins brtarget:$r7_2, IntRegs:$Rs),
- "p3 = sp"#SP#"loop0($r7_2, $Rs)" > {
- bits<9> r7_2;
- bits<5> Rs;
-
- let IClass = 0b0110;
-
- let Inst{22-21} = op;
- let Inst{27-23} = 0b00001;
- let Inst{20-16} = Rs;
- let Inst{12-8} = r7_2{8-4};
- let Inst{4-3} = r7_2{3-2};
- }
-
-multiclass SPLOOP_ri<string mnemonic, bits<2> op> {
- def i : SPLOOP_iBase<mnemonic, op>;
- def r : SPLOOP_rBase<mnemonic, op>;
-}
-
-defm J2_ploop1s : SPLOOP_ri<"1", 0b01>;
-defm J2_ploop2s : SPLOOP_ri<"2", 0b10>;
-defm J2_ploop3s : SPLOOP_ri<"3", 0b11>;
-
-// if (Rs[!>=<]=#0) jump:[t/nt]
-let Defs = [PC], isPredicated = 1, isBranch = 1, hasSideEffects = 0,
- hasSideEffects = 0 in
-class J2_jump_0_Base<string compare, bit isTak, bits<2> op>
- : CRInst <(outs), (ins IntRegs:$Rs, brtarget:$r13_2),
- "if ($Rs"#compare#"#0) jump"#!if(isTak, ":t", ":nt")#" $r13_2" > {
- bits<5> Rs;
- bits<15> r13_2;
-
- let IClass = 0b0110;
-
- let Inst{27-24} = 0b0001;
- let Inst{23-22} = op;
- let Inst{12} = isTak;
- let Inst{21} = r13_2{14};
- let Inst{20-16} = Rs;
- let Inst{11-1} = r13_2{12-2};
- let Inst{13} = r13_2{13};
- }
-
-multiclass J2_jump_compare_0<string compare, bits<2> op> {
- def NAME : J2_jump_0_Base<compare, 0, op>;
- def NAME#pt : J2_jump_0_Base<compare, 1, op>;
-}
-
-defm J2_jumprz : J2_jump_compare_0<"!=", 0b00>;
-defm J2_jumprgtez : J2_jump_compare_0<">=", 0b01>;
-defm J2_jumprnz : J2_jump_compare_0<"==", 0b10>;
-defm J2_jumprltez : J2_jump_compare_0<"<=", 0b11>;
-
-// Transfer to/from Control/GPR Guest/GPR
-let hasSideEffects = 0 in
-class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble>
- : CRInst <(outs CTRC:$dst), (ins RC:$src),
- "$dst = $src", [], "", CR_tc_3x_SLOT3> {
- bits<5> dst;
- bits<5> src;
-
- let IClass = 0b0110;
-
- let Inst{27-25} = 0b001;
- let Inst{24} = isDouble;
- let Inst{23-21} = 0b001;
- let Inst{20-16} = src;
- let Inst{4-0} = dst;
- }
-
-def A2_tfrrcr : TFR_CR_RS_base<CtrRegs, IntRegs, 0b0>;
-def A4_tfrpcp : TFR_CR_RS_base<CtrRegs64, DoubleRegs, 0b1>;
-def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
-def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
-
-let hasSideEffects = 0 in
-class TFR_RD_CR_base<RegisterClass RC, RegisterClass CTRC, bit isSingle>
- : CRInst <(outs RC:$dst), (ins CTRC:$src),
- "$dst = $src", [], "", CR_tc_3x_SLOT3> {
- bits<5> dst;
- bits<5> src;
-
- let IClass = 0b0110;
-
- let Inst{27-26} = 0b10;
- let Inst{25} = isSingle;
- let Inst{24-21} = 0b0000;
- let Inst{20-16} = src;
- let Inst{4-0} = dst;
- }
-
-let hasNewValue = 1, opNewValue = 0 in
-def A2_tfrcrr : TFR_RD_CR_base<IntRegs, CtrRegs, 1>;
-def A4_tfrcpp : TFR_RD_CR_base<DoubleRegs, CtrRegs64, 0>;
-def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
-def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
-
-// Y4_trace: Send value to etm trace.
-let isSoloAX = 1, hasSideEffects = 0 in
-def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs),
- "trace($Rs)"> {
- bits<5> Rs;
-
- let IClass = 0b0110;
- let Inst{27-21} = 0b0010010;
- let Inst{20-16} = Rs;
- }
-
-// HI/LO Instructions
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
- hasNewValue = 1, opNewValue = 0 in
-class REG_IMMED<string RegHalf, bit Rs, bits<3> MajOp, bit MinOp>
- : ALU32_ri<(outs IntRegs:$dst),
- (ins u16_0Imm:$imm_value),
- "$dst"#RegHalf#" = $imm_value", []> {
- bits<5> dst;
- bits<32> imm_value;
- let IClass = 0b0111;
-
- let Inst{27} = Rs;
- let Inst{26-24} = MajOp;
- let Inst{21} = MinOp;
- let Inst{20-16} = dst;
- let Inst{23-22} = imm_value{15-14};
- let Inst{13-0} = imm_value{13-0};
-}
-
-let isAsmParserOnly = 1 in {
- def LO : REG_IMMED<".l", 0b0, 0b001, 0b1>;
- def HI : REG_IMMED<".h", 0b0, 0b010, 0b1>;
-}
-
-let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in {
- def CONST32 : CONSTLDInst<(outs IntRegs:$Rd), (ins i32imm:$v),
- "$Rd = CONST32(#$v)", []>;
- def CONST64 : CONSTLDInst<(outs DoubleRegs:$Rd), (ins i64imm:$v),
- "$Rd = CONST64(#$v)", []>;
-}
-
-let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
- isCodeGenOnly = 1 in
-def PS_true : SInst<(outs PredRegs:$dst), (ins), "", []>;
-
-let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
- isCodeGenOnly = 1 in
-def PS_false : SInst<(outs PredRegs:$dst), (ins), "", []>;
-
-let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
- ".error \"should not emit\" ", []>;
-
-let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in
-def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- ".error \"should not emit\" ", []>;
-
-// Call subroutine indirectly.
-let Defs = VolatileV3.Regs in
-def J2_callr : JUMPR_MISC_CALLR<0, 1>;
-
-// Indirect tail-call.
-let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
- isTerminator = 1, isCodeGenOnly = 1 in
-def PS_tailcall_r : T_JMPr;
-
-// Direct tail-calls.
-let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
- isTerminator = 1, isCodeGenOnly = 1 in
-def PS_tailcall_i : JInst<(outs), (ins calltarget:$dst), "", []>;
-
-// The reason for the custom inserter is to record all ALLOCA instructions
-// in MachineFunctionInfo.
-let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1 in
-def PS_alloca: ALU32Inst<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u32_0Imm:$A), "", []>;
-
-let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in
-def PS_aligna : ALU32Inst<(outs IntRegs:$Rd), (ins u32_0Imm:$A), "", []>;
-
-// XTYPE/SHIFT
-//
-//===----------------------------------------------------------------------===//
-// Template Class
-// Shift by immediate/register and accumulate/logical
-//===----------------------------------------------------------------------===//
-
-// Rx[+-&|]=asr(Rs,#u5)
-// Rx[+-&|^]=lsr(Rs,#u5)
-// Rx[+-&|^]=asl(Rs,#u5)
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_shift_imm_acc_r <string opc1, string opc2, SDNode OpNode1,
- SDNode OpNode2, bits<3> majOp, bits<2> minOp>
- : SInst_acc<(outs IntRegs:$Rx),
- (ins IntRegs:$src1, IntRegs:$Rs, u5_0Imm:$u5),
- "$Rx "#opc2#opc1#"($Rs, #$u5)", [],
- "$src1 = $Rx", S_2op_tc_2_SLOT23> {
- bits<5> Rx;
- bits<5> Rs;
- bits<5> u5;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = 0b1110;
- let Inst{23-22} = majOp{2-1};
- let Inst{13} = 0b0;
- let Inst{7} = majOp{0};
- let Inst{6-5} = minOp;
- let Inst{4-0} = Rx;
- let Inst{20-16} = Rs;
- let Inst{12-8} = u5;
- }
-
-// Rx[+-&|]=asr(Rs,Rt)
-// Rx[+-&|^]=lsr(Rs,Rt)
-// Rx[+-&|^]=asl(Rs,Rt)
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_shift_reg_acc_r <string opc1, string opc2, SDNode OpNode1,
- SDNode OpNode2, bits<2> majOp, bits<2> minOp>
- : SInst_acc<(outs IntRegs:$Rx),
- (ins IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt),
- "$Rx "#opc2#opc1#"($Rs, $Rt)", [],
- "$src1 = $Rx", S_3op_tc_2_SLOT23 > {
- bits<5> Rx;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b1100;
- let Inst{23-22} = majOp;
- let Inst{7-6} = minOp;
- let Inst{4-0} = Rx;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- }
-
-// Rxx[+-&|]=asr(Rss,#u6)
-// Rxx[+-&|^]=lsr(Rss,#u6)
-// Rxx[+-&|^]=asl(Rss,#u6)
-
-class T_shift_imm_acc_p <string opc1, string opc2, SDNode OpNode1,
- SDNode OpNode2, bits<3> majOp, bits<2> minOp>
- : SInst_acc<(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$src1, DoubleRegs:$Rss, u6_0Imm:$u6),
- "$Rxx "#opc2#opc1#"($Rss, #$u6)", [],
- "$src1 = $Rxx", S_2op_tc_2_SLOT23> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<6> u6;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = 0b0010;
- let Inst{23-22} = majOp{2-1};
- let Inst{7} = majOp{0};
- let Inst{6-5} = minOp;
- let Inst{4-0} = Rxx;
- let Inst{20-16} = Rss;
- let Inst{13-8} = u6;
- }
-
-
-// Rxx[+-&|]=asr(Rss,Rt)
-// Rxx[+-&|^]=lsr(Rss,Rt)
-// Rxx[+-&|^]=asl(Rss,Rt)
-// Rxx[+-&|^]=lsl(Rss,Rt)
-
-class T_shift_reg_acc_p <string opc1, string opc2, SDNode OpNode1,
- SDNode OpNode2, bits<3> majOp, bits<2> minOp>
- : SInst_acc<(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$src1, DoubleRegs:$Rss, IntRegs:$Rt),
- "$Rxx "#opc2#opc1#"($Rss, $Rt)", [],
- "$src1 = $Rxx", S_3op_tc_2_SLOT23> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rt;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b1011;
- let Inst{23-21} = majOp;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rt;
- let Inst{7-6} = minOp;
- let Inst{4-0} = Rxx;
- }
-
-//===----------------------------------------------------------------------===//
-// Multi-class for the shift instructions with logical/arithmetic operators.
-//===----------------------------------------------------------------------===//
-
-multiclass xtype_imm_base<string OpcStr1, string OpcStr2, SDNode OpNode1,
- SDNode OpNode2, bits<3> majOp, bits<2> minOp > {
- def _i_r#NAME : T_shift_imm_acc_r< OpcStr1, OpcStr2, OpNode1,
- OpNode2, majOp, minOp >;
- def _i_p#NAME : T_shift_imm_acc_p< OpcStr1, OpcStr2, OpNode1,
- OpNode2, majOp, minOp >;
-}
-
-multiclass xtype_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> {
- let AddedComplexity = 100 in
- defm _acc : xtype_imm_base< opc1, "+= ", OpNode, add, 0b001, minOp>;
-
- defm _nac : xtype_imm_base< opc1, "-= ", OpNode, sub, 0b000, minOp>;
- defm _and : xtype_imm_base< opc1, "&= ", OpNode, and, 0b010, minOp>;
- defm _or : xtype_imm_base< opc1, "|= ", OpNode, or, 0b011, minOp>;
-}
-
-multiclass xtype_xor_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> {
-let AddedComplexity = 100 in
- defm _xacc : xtype_imm_base< opc1, "^= ", OpNode, xor, 0b100, minOp>;
-}
-
-defm S2_asr : xtype_imm_acc<"asr", sra, 0b00>;
-
-defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>,
- xtype_xor_imm_acc<"lsr", srl, 0b01>;
-
-defm S2_asl : xtype_imm_acc<"asl", shl, 0b10>,
- xtype_xor_imm_acc<"asl", shl, 0b10>;
-
-multiclass xtype_reg_acc_r<string opc1, SDNode OpNode, bits<2>minOp> {
- let AddedComplexity = 100 in
- def _acc : T_shift_reg_acc_r <opc1, "+= ", OpNode, add, 0b11, minOp>;
-
- def _nac : T_shift_reg_acc_r <opc1, "-= ", OpNode, sub, 0b10, minOp>;
- def _and : T_shift_reg_acc_r <opc1, "&= ", OpNode, and, 0b01, minOp>;
- def _or : T_shift_reg_acc_r <opc1, "|= ", OpNode, or, 0b00, minOp>;
-}
-
-multiclass xtype_reg_acc_p<string opc1, SDNode OpNode, bits<2>minOp> {
- let AddedComplexity = 100 in
- def _acc : T_shift_reg_acc_p <opc1, "+= ", OpNode, add, 0b110, minOp>;
-
- def _nac : T_shift_reg_acc_p <opc1, "-= ", OpNode, sub, 0b100, minOp>;
- def _and : T_shift_reg_acc_p <opc1, "&= ", OpNode, and, 0b010, minOp>;
- def _or : T_shift_reg_acc_p <opc1, "|= ", OpNode, or, 0b000, minOp>;
- def _xor : T_shift_reg_acc_p <opc1, "^= ", OpNode, xor, 0b011, minOp>;
-}
-
-multiclass xtype_reg_acc<string OpcStr, SDNode OpNode, bits<2> minOp > {
- defm _r_r : xtype_reg_acc_r <OpcStr, OpNode, minOp>;
- defm _r_p : xtype_reg_acc_p <OpcStr, OpNode, minOp>;
-}
-
-defm S2_asl : xtype_reg_acc<"asl", shl, 0b10>;
-defm S2_asr : xtype_reg_acc<"asr", sra, 0b00>;
-defm S2_lsr : xtype_reg_acc<"lsr", srl, 0b01>;
-defm S2_lsl : xtype_reg_acc<"lsl", shl, 0b11>;
-
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in
-class T_S3op_1 <string mnemonic, RegisterClass RC, bits<2> MajOp, bits<3> MinOp,
- bit SwapOps, bit isSat = 0, bit isRnd = 0, bit hasShift = 0>
- : SInst <(outs RC:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2),
- "$dst = "#mnemonic#"($src1, $src2)"#!if(isRnd, ":rnd", "")
- #!if(hasShift,":>>1","")
- #!if(isSat, ":sat", ""),
- [], "", S_3op_tc_2_SLOT23 > {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b0001;
- let Inst{23-22} = MajOp;
- let Inst{20-16} = !if (SwapOps, src2, src1);
- let Inst{12-8} = !if (SwapOps, src1, src2);
- let Inst{7-5} = MinOp;
- let Inst{4-0} = dst;
- }
-
-class T_S3op_64 <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit SwapOps,
- bit isSat = 0, bit isRnd = 0, bit hasShift = 0 >
- : T_S3op_1 <mnemonic, DoubleRegs, MajOp, MinOp, SwapOps,
- isSat, isRnd, hasShift>;
-
-let Itinerary = S_3op_tc_1_SLOT23 in {
- def S2_shuffeb : T_S3op_64 < "shuffeb", 0b00, 0b010, 0>;
- def S2_shuffeh : T_S3op_64 < "shuffeh", 0b00, 0b110, 0>;
- def S2_shuffob : T_S3op_64 < "shuffob", 0b00, 0b100, 1>;
- def S2_shuffoh : T_S3op_64 < "shuffoh", 0b10, 0b000, 1>;
-
- def S2_vtrunewh : T_S3op_64 < "vtrunewh", 0b10, 0b010, 0>;
- def S2_vtrunowh : T_S3op_64 < "vtrunowh", 0b10, 0b100, 0>;
-}
-
-def S2_lfsp : T_S3op_64 < "lfs", 0b10, 0b110, 0>;
-
-let hasSideEffects = 0 in
-class T_S3op_2 <string mnemonic, bits<3> MajOp, bit SwapOps>
- : SInst < (outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu),
- "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu)",
- [], "", S_3op_tc_1_SLOT23 > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
- bits<2> Pu;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b0010;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = !if (SwapOps, Rtt, Rss);
- let Inst{12-8} = !if (SwapOps, Rss, Rtt);
- let Inst{6-5} = Pu;
- let Inst{4-0} = Rdd;
- }
-
-def S2_valignrb : T_S3op_2 < "valignb", 0b000, 1>;
-def S2_vsplicerb : T_S3op_2 < "vspliceb", 0b100, 0>;
-
-//===----------------------------------------------------------------------===//
-// Template class used by vector shift, vector rotate, vector neg,
-// 32-bit shift, 64-bit shifts, etc.
-//===----------------------------------------------------------------------===//
-
-let hasSideEffects = 0 in
-class T_S3op_3 <string mnemonic, RegisterClass RC, bits<2> MajOp,
- bits<2> MinOp, bit isSat = 0, list<dag> pattern = [] >
- : SInst <(outs RC:$dst),
- (ins RC:$src1, IntRegs:$src2),
- "$dst = "#mnemonic#"($src1, $src2)"#!if(isSat, ":sat", ""),
- pattern, "", S_3op_tc_1_SLOT23> {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b0110, 0b0011);
- let Inst{23-22} = MajOp;
- let Inst{20-16} = src1;
- let Inst{12-8} = src2;
- let Inst{7-6} = MinOp;
- let Inst{4-0} = dst;
- }
-
-let hasNewValue = 1 in
-class T_S3op_shift32 <string mnemonic, SDNode OpNode, bits<2> MinOp>
- : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0, []>;
-
-let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in
-class T_S3op_shift32_Sat <string mnemonic, bits<2> MinOp>
- : T_S3op_3 <mnemonic, IntRegs, 0b00, MinOp, 1, []>;
-
-
-class T_S3op_shift64 <string mnemonic, SDNode OpNode, bits<2> MinOp>
- : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0, []>;
-
-
-class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp>
- : T_S3op_3 <mnemonic, DoubleRegs, MajOp, MinOp, 0, []>;
-
-
-// Shift by register
-// Rdd=[asr|lsr|asl|lsl](Rss,Rt)
-
-def S2_asr_r_p : T_S3op_shift64 < "asr", sra, 0b00>;
-def S2_lsr_r_p : T_S3op_shift64 < "lsr", srl, 0b01>;
-def S2_asl_r_p : T_S3op_shift64 < "asl", shl, 0b10>;
-def S2_lsl_r_p : T_S3op_shift64 < "lsl", shl, 0b11>;
-
-// Rd=[asr|lsr|asl|lsl](Rs,Rt)
-
-def S2_asr_r_r : T_S3op_shift32<"asr", sra, 0b00>;
-def S2_lsr_r_r : T_S3op_shift32<"lsr", srl, 0b01>;
-def S2_asl_r_r : T_S3op_shift32<"asl", shl, 0b10>;
-def S2_lsl_r_r : T_S3op_shift32<"lsl", shl, 0b11>;
-
-// Shift by register with saturation
-// Rd=asr(Rs,Rt):sat
-// Rd=asl(Rs,Rt):sat
-
-let Defs = [USR_OVF] in {
- def S2_asr_r_r_sat : T_S3op_shift32_Sat<"asr", 0b00>;
- def S2_asl_r_r_sat : T_S3op_shift32_Sat<"asl", 0b10>;
-}
-
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_S3op_8 <string opc, bits<3> MinOp, bit isSat, bit isRnd, bit hasShift, bit hasSplat = 0>
- : SInst < (outs IntRegs:$Rd),
- (ins DoubleRegs:$Rss, IntRegs:$Rt),
- "$Rd = "#opc#"($Rss, $Rt"#!if(hasSplat, "*", "")#")"
- #!if(hasShift, ":<<1", "")
- #!if(isRnd, ":rnd", "")
- #!if(isSat, ":sat", ""),
- [], "", S_3op_tc_1_SLOT23 > {
- bits<5> Rd;
- bits<5> Rss;
- bits<5> Rt;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b0101;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rt;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rd;
- }
-
-def S2_asr_r_svw_trun : T_S3op_8<"vasrw", 0b010, 0, 0, 0>;
-
-let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
-def S2_vcrotate : T_S3op_shiftVect < "vcrotate", 0b11, 0b00>;
-
-let hasSideEffects = 0 in
-class T_S3op_7 <string mnemonic, bit MajOp >
- : SInst <(outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3_0Imm:$u3),
- "$Rdd = "#mnemonic#"($Rss, $Rtt, #$u3)" ,
- [], "", S_3op_tc_1_SLOT23 > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
- bits<3> u3;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b0000;
- let Inst{23} = MajOp;
- let Inst{20-16} = !if(MajOp, Rss, Rtt);
- let Inst{12-8} = !if(MajOp, Rtt, Rss);
- let Inst{7-5} = u3;
- let Inst{4-0} = Rdd;
- }
-
-def S2_valignib : T_S3op_7 < "valignb", 0>;
-def S2_vspliceib : T_S3op_7 < "vspliceb", 1>;
-
-//===----------------------------------------------------------------------===//
-// Template class for 'insert bitfield' instructions
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in
-class T_S3op_insert <string mnemonic, RegisterClass RC>
- : SInst <(outs RC:$dst),
- (ins RC:$src1, RC:$src2, DoubleRegs:$src3),
- "$dst = "#mnemonic#"($src2, $src3)" ,
- [], "$src1 = $dst", S_3op_tc_1_SLOT23 > {
- bits<5> dst;
- bits<5> src2;
- bits<5> src3;
-
- let IClass = 0b1100;
-
- let Inst{27-26} = 0b10;
- let Inst{25-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b00, 0b10);
- let Inst{23} = 0b0;
- let Inst{20-16} = src2;
- let Inst{12-8} = src3;
- let Inst{4-0} = dst;
- }
-
-let hasSideEffects = 0 in
-class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp>
- : SInst <(outs RC:$dst), (ins RC:$dst2, RC:$src1, ImmOp:$src2, ImmOp:$src3),
- "$dst = insert($src1, #$src2, #$src3)",
- [], "$dst2 = $dst", S_2op_tc_2_SLOT23> {
- bits<5> dst;
- bits<5> src1;
- bits<6> src2;
- bits<6> src3;
- bit bit23;
- bit bit13;
- string ImmOpStr = !cast<string>(ImmOp);
-
- let bit23 = !if (!eq(ImmOpStr, "u6_0Imm"), src3{5}, 0);
- let bit13 = !if (!eq(ImmOpStr, "u6_0Imm"), src2{5}, 0);
-
- let IClass = 0b1000;
-
- let Inst{27-24} = RegTyBits;
- let Inst{23} = bit23;
- let Inst{22-21} = src3{4-3};
- let Inst{20-16} = src1;
- let Inst{13} = bit13;
- let Inst{12-8} = src2{4-0};
- let Inst{7-5} = src3{2-0};
- let Inst{4-0} = dst;
- }
-
-// Rx=insert(Rs,Rtt)
-// Rx=insert(Rs,#u5,#U5)
-let hasNewValue = 1 in {
- def S2_insert_rp : T_S3op_insert <"insert", IntRegs>;
- def S2_insert : T_S2op_insert <0b1111, IntRegs, u5_0Imm>;
-}
-
-// Rxx=insert(Rss,Rtt)
-// Rxx=insert(Rss,#u6,#U6)
-def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>;
-def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6_0Imm>;
-
-
-//===----------------------------------------------------------------------===//
-// Template class for 'extract bitfield' instructions
-//===----------------------------------------------------------------------===//
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_S3op_extract <string mnemonic, bits<2> MinOp>
- : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, DoubleRegs:$Rtt),
- "$Rd = "#mnemonic#"($Rs, $Rtt)",
- [], "", S_3op_tc_2_SLOT23 > {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rtt;
-
- let IClass = 0b1100;
-
- let Inst{27-22} = 0b100100;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rtt;
- let Inst{7-6} = MinOp;
- let Inst{4-0} = Rd;
- }
-
-let hasSideEffects = 0 in
-class T_S2op_extract <string mnemonic, bits<4> RegTyBits,
- RegisterClass RC, Operand ImmOp>
- : SInst <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2, ImmOp:$src3),
- "$dst = "#mnemonic#"($src1, #$src2, #$src3)",
- [], "", S_2op_tc_2_SLOT23> {
- bits<5> dst;
- bits<5> src1;
- bits<6> src2;
- bits<6> src3;
- bit bit23;
- bit bit13;
- string ImmOpStr = !cast<string>(ImmOp);
-
- let bit23 = !if (!eq(ImmOpStr, "u6_0Imm"), src3{5},
- !if (!eq(mnemonic, "extractu"), 0, 1));
-
- let bit13 = !if (!eq(ImmOpStr, "u6_0Imm"), src2{5}, 0);
-
- let IClass = 0b1000;
-
- let Inst{27-24} = RegTyBits;
- let Inst{23} = bit23;
- let Inst{22-21} = src3{4-3};
- let Inst{20-16} = src1;
- let Inst{13} = bit13;
- let Inst{12-8} = src2{4-0};
- let Inst{7-5} = src3{2-0};
- let Inst{4-0} = dst;
- }
-
-// Extract bitfield
-
-// Rdd=extractu(Rss,Rtt)
-// Rdd=extractu(Rss,#u6,#U6)
-def S2_extractup_rp : T_S3op_64 < "extractu", 0b00, 0b000, 0>;
-def S2_extractup : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6_0Imm>;
-
-// Rd=extractu(Rs,Rtt)
-// Rd=extractu(Rs,#u5,#U5)
-let hasNewValue = 1 in {
- def S2_extractu_rp : T_S3op_extract<"extractu", 0b00>;
- def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5_0Imm>;
-}
-
-//===----------------------------------------------------------------------===//
-// :raw for of tableindx[bdhw] insns
-//===----------------------------------------------------------------------===//
-
-let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-class tableidxRaw<string OpStr, bits<2>MinOp>
- : SInst <(outs IntRegs:$Rx),
- (ins IntRegs:$_dst_, IntRegs:$Rs, u4_0Imm:$u4, s6_0Imm:$S6),
- "$Rx = "#OpStr#"($Rs, #$u4, #$S6):raw",
- [], "$Rx = $_dst_" > {
- bits<5> Rx;
- bits<5> Rs;
- bits<4> u4;
- bits<6> S6;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = 0b0111;
- let Inst{23-22} = MinOp;
- let Inst{21} = u4{3};
- let Inst{20-16} = Rs;
- let Inst{13-8} = S6;
- let Inst{7-5} = u4{2-0};
- let Inst{4-0} = Rx;
- }
-
-def S2_tableidxb : tableidxRaw<"tableidxb", 0b00>;
-def S2_tableidxh : tableidxRaw<"tableidxh", 0b01>;
-def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>;
-def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>;
-
-//===----------------------------------------------------------------------===//
-// Template class for 'table index' instructions which are assembler mapped
-// to their :raw format.
-//===----------------------------------------------------------------------===//
-let isPseudo = 1 in
-class tableidx_goodsyntax <string mnemonic>
- : SInst <(outs IntRegs:$Rx),
- (ins IntRegs:$_dst_, IntRegs:$Rs, u4_0Imm:$u4, u5_0Imm:$u5),
- "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)",
- [], "$Rx = $_dst_" >;
-
-def S2_tableidxb_goodsyntax : tableidx_goodsyntax<"tableidxb">;
-def S2_tableidxh_goodsyntax : tableidx_goodsyntax<"tableidxh">;
-def S2_tableidxw_goodsyntax : tableidx_goodsyntax<"tableidxw">;
-def S2_tableidxd_goodsyntax : tableidx_goodsyntax<"tableidxd">;
-
-//===----------------------------------------------------------------------===//
-// V3 Instructions +
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrInfoV3.td"
-
-//===----------------------------------------------------------------------===//
-// V3 Instructions -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// V4 Instructions +
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrInfoV4.td"
-
-//===----------------------------------------------------------------------===//
-// V4 Instructions -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// V5 Instructions +
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrInfoV5.td"
-
-//===----------------------------------------------------------------------===//
-// V5 Instructions -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// V60 Instructions +
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrInfoV60.td"
-
-//===----------------------------------------------------------------------===//
-// V60 Instructions -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ALU32/64/Vector +
-//===----------------------------------------------------------------------===///
-
-include "HexagonInstrInfoVector.td"
-
-include "HexagonInstrAlias.td"
-include "HexagonSystemInst.td"
-
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td
deleted file mode 100644
index 225f94405076..000000000000
--- a/lib/Target/Hexagon/HexagonInstrInfoV3.td
+++ /dev/null
@@ -1,215 +0,0 @@
-//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V3 instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// J +
-//===----------------------------------------------------------------------===//
-// Call subroutine.
-let isCall = 1, hasSideEffects = 1, isPredicable = 1,
- isExtended = 0, isExtendable = 1, opExtendable = 0,
- isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in
-class T_Call<bit CSR, string ExtStr>
- : JInst<(outs), (ins calltarget:$dst),
- "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> {
- let BaseOpcode = "call";
- bits<24> dst;
-
- let Defs = !if (CSR, VolatileV3.Regs, []);
- let IClass = 0b0101;
- let Inst{27-25} = 0b101;
- let Inst{24-16,13-1} = dst{23-2};
- let Inst{0} = 0b0;
-}
-
-let isCall = 1, hasSideEffects = 1, isPredicated = 1,
- isExtended = 0, isExtendable = 1, opExtendable = 1,
- isExtentSigned = 1, opExtentBits = 17, opExtentAlign = 2 in
-class T_CallPred<bit CSR, bit IfTrue, string ExtStr>
- : JInst<(outs), (ins PredRegs:$Pu, calltarget:$dst),
- CondStr<"$Pu", IfTrue, 0>.S # "call " # ExtStr # "$dst",
- [], "", J_tc_2early_SLOT23> {
- let BaseOpcode = "call";
- let isPredicatedFalse = !if(IfTrue,0,1);
- bits<2> Pu;
- bits<17> dst;
-
- let Defs = !if (CSR, VolatileV3.Regs, []);
- let IClass = 0b0101;
- let Inst{27-24} = 0b1101;
- let Inst{23-22,20-16,13,7-1} = dst{16-2};
- let Inst{21} = !if(IfTrue,0,1);
- let Inst{11} = 0b0;
- let Inst{9-8} = Pu;
-}
-
-multiclass T_Calls<bit CSR, string ExtStr> {
- def NAME : T_Call<CSR, ExtStr>;
- def t : T_CallPred<CSR, 1, ExtStr>;
- def f : T_CallPred<CSR, 0, ExtStr>;
-}
-
-defm J2_call: T_Calls<1, "">, PredRel;
-
-let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1,
- Defs = VolatileV3.Regs in
-def PS_call_nr : T_Call<1, "">, PredRel;
-
-let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1,
- Defs = [PC, R31, R6, R7, P0] in
-def PS_call_stk : T_Call<0, "">, PredRel;
-
-//===----------------------------------------------------------------------===//
-// J -
-//===----------------------------------------------------------------------===//
-
-
-//===----------------------------------------------------------------------===//
-// JR +
-//===----------------------------------------------------------------------===//
-// Call subroutine from register.
-
-let isCodeGenOnly = 1, Defs = VolatileV3.Regs in {
- def PS_callr_nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return.
-}
-
-//===----------------------------------------------------------------------===//
-// JR -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ALU64/ALU +
-//===----------------------------------------------------------------------===//
-
-let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23 in
-def A2_addpsat : T_ALU64_arith<"add", 0b011, 0b101, 1, 0, 1>;
-
-class T_ALU64_addsp_hl<string suffix, bits<3> MinOp>
- : T_ALU64_rr<"add", suffix, 0b0011, 0b011, MinOp, 0, 0, "">;
-
-def A2_addspl : T_ALU64_addsp_hl<":raw:lo", 0b110>;
-def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>;
-
-let hasSideEffects = 0, isAsmParserOnly = 1 in
-def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd),
- (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)", [],
- "", ALU64_tc_1_SLOT23>;
-
-
-let hasSideEffects = 0 in
-class T_XTYPE_MIN_MAX_P<bit isMax, bit isUnsigned>
- : ALU64Inst<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rt, DoubleRegs:$Rs),
- "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","")
- #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b00111;
- let Inst{22-21} = !if(isMax, 0b10, 0b01);
- let Inst{20-16} = !if(isMax, Rt, Rs);
- let Inst{12-8} = !if(isMax, Rs, Rt);
- let Inst{7} = 0b1;
- let Inst{6} = !if(isMax, 0b0, 0b1);
- let Inst{5} = isUnsigned;
- let Inst{4-0} = Rd;
-}
-
-def A2_minp : T_XTYPE_MIN_MAX_P<0, 0>;
-def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>;
-def A2_maxp : T_XTYPE_MIN_MAX_P<1, 0>;
-def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>;
-
-//===----------------------------------------------------------------------===//
-// ALU64/ALU -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// :raw form of vrcmpys:hi/lo insns
-//===----------------------------------------------------------------------===//
-// Vector reduce complex multiply by scalar.
-let Defs = [USR_OVF], hasSideEffects = 0 in
-class T_vrcmpRaw<string HiLo, bits<3>MajOp>:
- MInst<(outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rdd = vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, []> {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1000;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- let Inst{7-5} = 0b100;
- let Inst{4-0} = Rdd;
-}
-
-def M2_vrcmpys_s1_h: T_vrcmpRaw<"hi", 0b101>;
-def M2_vrcmpys_s1_l: T_vrcmpRaw<"lo", 0b111>;
-
-// Assembler mapped to M2_vrcmpys_s1_h or M2_vrcmpys_s1_l
-let hasSideEffects = 0, isAsmParserOnly = 1 in
-def M2_vrcmpys_s1
- : MInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, IntRegs:$Rt),
- "$Rdd=vrcmpys($Rss,$Rt):<<1:sat">;
-
-// Vector reduce complex multiply by scalar with accumulation.
-let Defs = [USR_OVF], hasSideEffects = 0 in
-class T_vrcmpys_acc<string HiLo, bits<3>MajOp>:
- MInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$_src_, DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rxx += vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, [],
- "$Rxx = $_src_"> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1010;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- let Inst{7-5} = 0b100;
- let Inst{4-0} = Rxx;
- }
-
-def M2_vrcmpys_acc_s1_h: T_vrcmpys_acc<"hi", 0b101>;
-def M2_vrcmpys_acc_s1_l: T_vrcmpys_acc<"lo", 0b111>;
-
-// Assembler mapped to M2_vrcmpys_acc_s1_h or M2_vrcmpys_acc_s1_l
-
-let isAsmParserOnly = 1 in
-def M2_vrcmpys_acc_s1
- : MInst <(outs DoubleRegs:$dst),
- (ins DoubleRegs:$dst2, DoubleRegs:$src1, IntRegs:$src2),
- "$dst += vrcmpys($src1, $src2):<<1:sat", [],
- "$dst2 = $dst">;
-
-def M2_vrcmpys_s1rp_h : T_MType_vrcmpy <"vrcmpys", 0b101, 0b110, 1>;
-def M2_vrcmpys_s1rp_l : T_MType_vrcmpy <"vrcmpys", 0b101, 0b111, 0>;
-
-// Assembler mapped to M2_vrcmpys_s1rp_h or M2_vrcmpys_s1rp_l
-let isAsmParserOnly = 1 in
-def M2_vrcmpys_s1rp
- : MInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, IntRegs:$Rt),
- "$Rd=vrcmpys($Rss,$Rt):<<1:rnd:sat">;
-
-
-// S2_cabacdecbin: Cabac decode bin.
-let Defs = [P0], isPredicateLate = 1, Itinerary = S_3op_tc_1_SLOT23 in
-def S2_cabacdecbin : T_S3op_64 < "decbin", 0b11, 0b110, 0>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
deleted file mode 100644
index 18943a082d28..000000000000
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ /dev/null
@@ -1,3301 +0,0 @@
-//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V4 instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-def DuplexIClass0: InstDuplex < 0 >;
-def DuplexIClass1: InstDuplex < 1 >;
-def DuplexIClass2: InstDuplex < 2 >;
-let isExtendable = 1 in {
- def DuplexIClass3: InstDuplex < 3 >;
- def DuplexIClass4: InstDuplex < 4 >;
- def DuplexIClass5: InstDuplex < 5 >;
- def DuplexIClass6: InstDuplex < 6 >;
- def DuplexIClass7: InstDuplex < 7 >;
-}
-def DuplexIClass8: InstDuplex < 8 >;
-def DuplexIClass9: InstDuplex < 9 >;
-def DuplexIClassA: InstDuplex < 0xA >;
-def DuplexIClassB: InstDuplex < 0xB >;
-def DuplexIClassC: InstDuplex < 0xC >;
-def DuplexIClassD: InstDuplex < 0xD >;
-def DuplexIClassE: InstDuplex < 0xE >;
-def DuplexIClassF: InstDuplex < 0xF >;
-
-let hasSideEffects = 0 in
-class T_Immext<Operand ImmType>
- : EXTENDERInst<(outs), (ins ImmType:$imm),
- "immext(#$imm)", []> {
- bits<32> imm;
- let IClass = 0b0000;
-
- let Inst{27-16} = imm{31-20};
- let Inst{13-0} = imm{19-6};
- }
-
-def A4_ext : T_Immext<u26_6Imm>;
-let isCodeGenOnly = 1 in {
- let isBranch = 1 in
- def A4_ext_b : T_Immext<brtarget>;
- let isCall = 1 in
- def A4_ext_c : T_Immext<calltarget>;
- def A4_ext_g : T_Immext<globaladdress>;
-}
-
-// Hexagon V4 Architecture spec defines 8 instruction classes:
-// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
-// compiler)
-
-// LD Instructions:
-// ========================================
-// Loads (8/16/32/64 bit)
-// Deallocframe
-
-// ST Instructions:
-// ========================================
-// Stores (8/16/32/64 bit)
-// Allocframe
-
-// ALU32 Instructions:
-// ========================================
-// Arithmetic / Logical (32 bit)
-// Vector Halfword
-
-// XTYPE Instructions (32/64 bit):
-// ========================================
-// Arithmetic, Logical, Bit Manipulation
-// Multiply (Integer, Fractional, Complex)
-// Permute / Vector Permute Operations
-// Predicate Operations
-// Shift / Shift with Add/Sub/Logical
-// Vector Byte ALU
-// Vector Halfword (ALU, Shift, Multiply)
-// Vector Word (ALU, Shift)
-
-// J Instructions:
-// ========================================
-// Jump/Call PC-relative
-
-// JR Instructions:
-// ========================================
-// Jump/Call Register
-
-// MEMOP Instructions:
-// ========================================
-// Operation on memory (8/16/32 bit)
-
-// NV Instructions:
-// ========================================
-// New-value Jumps
-// New-value Stores
-
-// CR Instructions:
-// ========================================
-// Control-Register Transfers
-// Hardware Loop Setup
-// Predicate Logicals & Reductions
-
-// SYSTEM Instructions (not implemented in the compiler):
-// ========================================
-// Prefetch
-// Cache Maintenance
-// Bus Operations
-
-
-//===----------------------------------------------------------------------===//
-// ALU32 +
-//===----------------------------------------------------------------------===//
-
-class T_ALU32_3op_not<string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit OpsRev>
- : T_ALU32_3op<mnemonic, MajOp, MinOp, OpsRev, 0> {
- let AsmString = "$Rd = "#mnemonic#"($Rs, ~$Rt)";
-}
-
-let BaseOpcode = "andn_rr", CextOpcode = "andn" in
-def A4_andn : T_ALU32_3op_not<"and", 0b001, 0b100, 1>;
-let BaseOpcode = "orn_rr", CextOpcode = "orn" in
-def A4_orn : T_ALU32_3op_not<"or", 0b001, 0b101, 1>;
-
-let CextOpcode = "rcmp.eq" in
-def A4_rcmpeq : T_ALU32_3op<"cmp.eq", 0b011, 0b010, 0, 1>;
-let CextOpcode = "!rcmp.eq" in
-def A4_rcmpneq : T_ALU32_3op<"!cmp.eq", 0b011, 0b011, 0, 1>;
-
-def C4_cmpneq : T_ALU32_3op_cmp<"!cmp.eq", 0b00, 1, 1>;
-def C4_cmplte : T_ALU32_3op_cmp<"!cmp.gt", 0b10, 1, 0>;
-def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>;
-
-class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
- : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>,
- ImmRegRel {
- let InputType = "reg";
- let CextOpcode = mnemonic;
- let isCompare = 1;
- let isCommutable = IsComm;
- let hasSideEffects = 0;
-
- bits<2> Pd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1100;
- let Inst{27-21} = 0b0111110;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7-5} = MinOp;
- let Inst{1-0} = Pd;
-}
-
-def A4_cmpbeq : T_CMP_rrbh<"cmpb.eq", 0b110, 1>;
-def A4_cmpbgt : T_CMP_rrbh<"cmpb.gt", 0b010, 0>;
-def A4_cmpbgtu : T_CMP_rrbh<"cmpb.gtu", 0b111, 0>;
-def A4_cmpheq : T_CMP_rrbh<"cmph.eq", 0b011, 1>;
-def A4_cmphgt : T_CMP_rrbh<"cmph.gt", 0b100, 0>;
-def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>;
-
-class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
- Operand ImmType, bit IsImmExt, bit IsImmSigned, int ImmBits>
- : ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm),
- "$Pd = "#mnemonic#"($Rs, #$Imm)", [], "", ALU64_tc_2early_SLOT23>,
- ImmRegRel {
- let InputType = "imm";
- let CextOpcode = mnemonic;
- let isCompare = 1;
- let isCommutable = IsComm;
- let hasSideEffects = 0;
- let isExtendable = IsImmExt;
- let opExtendable = !if (IsImmExt, 2, 0);
- let isExtentSigned = IsImmSigned;
- let opExtentBits = ImmBits;
-
- bits<2> Pd;
- bits<5> Rs;
- bits<8> Imm;
-
- let IClass = 0b1101;
- let Inst{27-24} = 0b1101;
- let Inst{22-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{12-5} = Imm;
- let Inst{4} = 0b0;
- let Inst{3} = IsHalf;
- let Inst{1-0} = Pd;
-}
-
-def A4_cmpbeqi : T_CMP_ribh<"cmpb.eq", 0b00, 0, 1, u8_0Imm, 0, 0, 8>;
-def A4_cmpbgti : T_CMP_ribh<"cmpb.gt", 0b01, 0, 0, s8_0Imm, 0, 1, 8>;
-def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7_0Ext, 1, 0, 7>;
-def A4_cmpheqi : T_CMP_ribh<"cmph.eq", 0b00, 1, 1, s8_0Ext, 1, 1, 8>;
-def A4_cmphgti : T_CMP_ribh<"cmph.gt", 0b01, 1, 0, s8_0Ext, 1, 1, 8>;
-def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7_0Ext, 1, 0, 7>;
-
-class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
- : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8_0Ext:$s8),
- "$Rd = "#mnemonic#"($Rs, #$s8)", [], "", ALU32_2op_tc_1_SLOT0123>,
- ImmRegRel {
- let InputType = "imm";
- let CextOpcode = !if (IsNeg, "!rcmp.eq", "rcmp.eq");
- let isExtendable = 1;
- let opExtendable = 2;
- let isExtentSigned = 1;
- let opExtentBits = 8;
- let hasNewValue = 1;
-
- bits<5> Rd;
- bits<5> Rs;
- bits<8> s8;
-
- let IClass = 0b0111;
- let Inst{27-24} = 0b0011;
- let Inst{22} = 0b1;
- let Inst{21} = IsNeg;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b1;
- let Inst{12-5} = s8;
- let Inst{4-0} = Rd;
-}
-
-def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>;
-def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>;
-
-//===----------------------------------------------------------------------===//
-// ALU32 -
-//===----------------------------------------------------------------------===//
-
-
-//===----------------------------------------------------------------------===//
-// ALU32/PERM +
-//===----------------------------------------------------------------------===//
-
-// Combine a word and an immediate into a register pair.
-let hasSideEffects = 0, isExtentSigned = 1, isExtendable = 1,
- opExtentBits = 8 in
-class T_Combine1 <bits<2> MajOp, dag ins, string AsmStr>
- : ALU32Inst <(outs DoubleRegs:$Rdd), ins, AsmStr> {
- bits<5> Rdd;
- bits<5> Rs;
- bits<8> s8;
-
- let IClass = 0b0111;
- let Inst{27-24} = 0b0011;
- let Inst{22-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b1;
- let Inst{12-5} = s8;
- let Inst{4-0} = Rdd;
- }
-
-let opExtendable = 2 in
-def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8_0Ext:$s8),
- "$Rdd = combine($Rs, #$s8)">;
-
-let opExtendable = 1 in
-def A4_combineir : T_Combine1<0b01, (ins s8_0Ext:$s8, IntRegs:$Rs),
- "$Rdd = combine(#$s8, $Rs)">;
-
-// A4_combineii: Set two small immediates.
-let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in
-def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8_0Imm:$s8, u6_0Ext:$U6),
- "$Rdd = combine(#$s8, #$U6)"> {
- bits<5> Rdd;
- bits<8> s8;
- bits<6> U6;
-
- let IClass = 0b0111;
- let Inst{27-23} = 0b11001;
- let Inst{20-16} = U6{5-1};
- let Inst{13} = U6{0};
- let Inst{12-5} = s8;
- let Inst{4-0} = Rdd;
- }
-
-//===----------------------------------------------------------------------===//
-// ALU32/PERM -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// LD +
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Template class for load instructions with Absolute set addressing mode.
-//===----------------------------------------------------------------------===//
-let isExtended = 1, opExtendable = 2, opExtentBits = 6, addrMode = AbsoluteSet,
- hasSideEffects = 0 in
-class T_LD_abs_set<string mnemonic, RegisterClass RC, bits<4>MajOp>:
- LDInst<(outs RC:$dst1, IntRegs:$dst2),
- (ins u6_0Ext:$addr),
- "$dst1 = "#mnemonic#"($dst2 = #$addr)",
- []> {
- bits<7> name;
- bits<5> dst1;
- bits<5> dst2;
- bits<6> addr;
-
- let IClass = 0b1001;
- let Inst{27-25} = 0b101;
- let Inst{24-21} = MajOp;
- let Inst{13-12} = 0b01;
- let Inst{4-0} = dst1;
- let Inst{20-16} = dst2;
- let Inst{11-8} = addr{5-2};
- let Inst{6-5} = addr{1-0};
-}
-
-let accessSize = ByteAccess, hasNewValue = 1 in {
- def L4_loadrb_ap : T_LD_abs_set <"memb", IntRegs, 0b1000>;
- def L4_loadrub_ap : T_LD_abs_set <"memub", IntRegs, 0b1001>;
-}
-
-let accessSize = HalfWordAccess, hasNewValue = 1 in {
- def L4_loadrh_ap : T_LD_abs_set <"memh", IntRegs, 0b1010>;
- def L4_loadruh_ap : T_LD_abs_set <"memuh", IntRegs, 0b1011>;
- def L4_loadbsw2_ap : T_LD_abs_set <"membh", IntRegs, 0b0001>;
- def L4_loadbzw2_ap : T_LD_abs_set <"memubh", IntRegs, 0b0011>;
-}
-
-let accessSize = WordAccess, hasNewValue = 1 in
- def L4_loadri_ap : T_LD_abs_set <"memw", IntRegs, 0b1100>;
-
-let accessSize = WordAccess in {
- def L4_loadbzw4_ap : T_LD_abs_set <"memubh", DoubleRegs, 0b0101>;
- def L4_loadbsw4_ap : T_LD_abs_set <"membh", DoubleRegs, 0b0111>;
-}
-
-let accessSize = DoubleWordAccess in
-def L4_loadrd_ap : T_LD_abs_set <"memd", DoubleRegs, 0b1110>;
-
-let accessSize = ByteAccess in
- def L4_loadalignb_ap : T_LD_abs_set <"memb_fifo", DoubleRegs, 0b0100>;
-
-let accessSize = HalfWordAccess in
-def L4_loadalignh_ap : T_LD_abs_set <"memh_fifo", DoubleRegs, 0b0010>;
-
-// Load - Indirect with long offset
-let InputType = "imm", addrMode = BaseLongOffset, isExtended = 1,
-opExtentBits = 6, opExtendable = 3 in
-class T_LoadAbsReg <string mnemonic, string CextOp, RegisterClass RC,
- bits<4> MajOp>
- : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2_0Imm:$src2, u6_0Ext:$src3),
- "$dst = "#mnemonic#"($src1<<#$src2 + #$src3)",
- [] >, ImmRegShl {
- bits<5> dst;
- bits<5> src1;
- bits<2> src2;
- bits<6> src3;
- let CextOpcode = CextOp;
- let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
-
- let IClass = 0b1001;
- let Inst{27-25} = 0b110;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = src2{1};
- let Inst{12} = 0b1;
- let Inst{11-8} = src3{5-2};
- let Inst{7} = src2{0};
- let Inst{6-5} = src3{1-0};
- let Inst{4-0} = dst;
- }
-
-let accessSize = ByteAccess in {
- def L4_loadrb_ur : T_LoadAbsReg<"memb", "LDrib", IntRegs, 0b1000>;
- def L4_loadrub_ur : T_LoadAbsReg<"memub", "LDriub", IntRegs, 0b1001>;
- def L4_loadalignb_ur : T_LoadAbsReg<"memb_fifo", "LDrib_fifo",
- DoubleRegs, 0b0100>;
-}
-
-let accessSize = HalfWordAccess in {
- def L4_loadrh_ur : T_LoadAbsReg<"memh", "LDrih", IntRegs, 0b1010>;
- def L4_loadruh_ur : T_LoadAbsReg<"memuh", "LDriuh", IntRegs, 0b1011>;
- def L4_loadbsw2_ur : T_LoadAbsReg<"membh", "LDribh2", IntRegs, 0b0001>;
- def L4_loadbzw2_ur : T_LoadAbsReg<"memubh", "LDriubh2", IntRegs, 0b0011>;
- def L4_loadalignh_ur : T_LoadAbsReg<"memh_fifo", "LDrih_fifo",
- DoubleRegs, 0b0010>;
-}
-
-let accessSize = WordAccess in {
- def L4_loadri_ur : T_LoadAbsReg<"memw", "LDriw", IntRegs, 0b1100>;
- def L4_loadbsw4_ur : T_LoadAbsReg<"membh", "LDribh4", DoubleRegs, 0b0111>;
- def L4_loadbzw4_ur : T_LoadAbsReg<"memubh", "LDriubh4", DoubleRegs, 0b0101>;
-}
-
-let accessSize = DoubleWordAccess in
-def L4_loadrd_ur : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>;
-
-
-//===----------------------------------------------------------------------===//
-// Template classes for the non-predicated load instructions with
-// base + register offset addressing mode
-//===----------------------------------------------------------------------===//
-class T_load_rr <string mnemonic, RegisterClass RC, bits<3> MajOp>:
- LDInst<(outs RC:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2_0Imm:$u2),
- "$dst = "#mnemonic#"($src1 + $src2<<#$u2)",
- [], "", V4LDST_tc_ld_SLOT01>, ImmRegShl, AddrModeRel {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
- bits<2> u2;
-
- let IClass = 0b0011;
-
- let Inst{27-24} = 0b1010;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{12-8} = src2;
- let Inst{13} = u2{1};
- let Inst{7} = u2{0};
- let Inst{4-0} = dst;
- }
-
-//===----------------------------------------------------------------------===//
-// Template classes for the predicated load instructions with
-// base + register offset addressing mode
-//===----------------------------------------------------------------------===//
-let isPredicated = 1 in
-class T_pload_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
- bit isNot, bit isPredNew>:
- LDInst <(outs RC:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2_0Imm:$u2),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$u2)",
- [], "", V4LDST_tc_ld_SLOT01>, AddrModeRel {
- bits<5> dst;
- bits<2> src1;
- bits<5> src2;
- bits<5> src3;
- bits<2> u2;
-
- let isPredicatedFalse = isNot;
- let isPredicatedNew = isPredNew;
-
- let IClass = 0b0011;
-
- let Inst{27-26} = 0b00;
- let Inst{25} = isPredNew;
- let Inst{24} = isNot;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{12-8} = src3;
- let Inst{13} = u2{1};
- let Inst{7} = u2{0};
- let Inst{6-5} = src1;
- let Inst{4-0} = dst;
- }
-
-//===----------------------------------------------------------------------===//
-// multiclass for load instructions with base + register offset
-// addressing mode
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, addrMode = BaseRegOffset in
-multiclass ld_idxd_shl <string mnemonic, string CextOp, RegisterClass RC,
- bits<3> MajOp > {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl,
- InputType = "reg" in {
- let isPredicable = 1 in
- def L4_#NAME#_rr : T_load_rr <mnemonic, RC, MajOp>;
-
- // Predicated
- def L4_p#NAME#t_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 0>;
- def L4_p#NAME#f_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 0>;
-
- // Predicated new
- def L4_p#NAME#tnew_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 1>;
- def L4_p#NAME#fnew_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 1>;
- }
-}
-
-let hasNewValue = 1, accessSize = ByteAccess in {
- defm loadrb : ld_idxd_shl<"memb", "LDrib", IntRegs, 0b000>;
- defm loadrub : ld_idxd_shl<"memub", "LDriub", IntRegs, 0b001>;
-}
-
-let hasNewValue = 1, accessSize = HalfWordAccess in {
- defm loadrh : ld_idxd_shl<"memh", "LDrih", IntRegs, 0b010>;
- defm loadruh : ld_idxd_shl<"memuh", "LDriuh", IntRegs, 0b011>;
-}
-
-let hasNewValue = 1, accessSize = WordAccess in
-defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>;
-
-let accessSize = DoubleWordAccess in
-defm loadrd : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>;
-
-//===----------------------------------------------------------------------===//
-// LD -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ST +
-//===----------------------------------------------------------------------===//
-///
-//===----------------------------------------------------------------------===//
-// Template class for store instructions with Absolute set addressing mode.
-//===----------------------------------------------------------------------===//
-let isExtended = 1, opExtendable = 1, opExtentBits = 6,
- addrMode = AbsoluteSet in
-class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
- bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
- : STInst<(outs IntRegs:$dst),
- (ins u6_0Ext:$addr, RC:$src),
- mnemonic#"($dst = #$addr) = $src"#!if(isHalf, ".h","")>, NewValueRel {
- bits<5> dst;
- bits<6> addr;
- bits<5> src;
- let accessSize = AccessSz;
- let BaseOpcode = BaseOp#"_AbsSet";
-
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
-
- let IClass = 0b1010;
-
- let Inst{27-24} = 0b1011;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = dst;
- let Inst{13} = 0b0;
- let Inst{12-8} = src;
- let Inst{7} = 0b1;
- let Inst{5-0} = addr;
- }
-
-def S4_storerb_ap : T_ST_absset <"memb", "STrib", IntRegs, 0b000, ByteAccess>;
-def S4_storerh_ap : T_ST_absset <"memh", "STrih", IntRegs, 0b010,
- HalfWordAccess>;
-def S4_storeri_ap : T_ST_absset <"memw", "STriw", IntRegs, 0b100, WordAccess>;
-
-let isNVStorable = 0 in {
- def S4_storerf_ap : T_ST_absset <"memh", "STrif", IntRegs,
- 0b011, HalfWordAccess, 1>;
- def S4_storerd_ap : T_ST_absset <"memd", "STrid", DoubleRegs,
- 0b110, DoubleWordAccess>;
-}
-
-let opExtendable = 1, isNewValue = 1, isNVStore = 1, opNewValue = 2,
-isExtended = 1, opExtentBits= 6 in
-class T_ST_absset_nv <string mnemonic, string BaseOp, bits<2> MajOp,
- MemAccessSize AccessSz >
- : NVInst <(outs IntRegs:$dst),
- (ins u6_0Ext:$addr, IntRegs:$src),
- mnemonic#"($dst = #$addr) = $src.new">, NewValueRel {
- bits<5> dst;
- bits<6> addr;
- bits<3> src;
- let accessSize = AccessSz;
- let BaseOpcode = BaseOp#"_AbsSet";
-
- let IClass = 0b1010;
-
- let Inst{27-21} = 0b1011101;
- let Inst{20-16} = dst;
- let Inst{13-11} = 0b000;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src;
- let Inst{7} = 0b1;
- let Inst{5-0} = addr;
- }
-
-let mayStore = 1, addrMode = AbsoluteSet in {
- def S4_storerbnew_ap : T_ST_absset_nv <"memb", "STrib", 0b00, ByteAccess>;
- def S4_storerhnew_ap : T_ST_absset_nv <"memh", "STrih", 0b01, HalfWordAccess>;
- def S4_storerinew_ap : T_ST_absset_nv <"memw", "STriw", 0b10, WordAccess>;
-}
-
-let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm",
- addrMode = BaseLongOffset, AddedComplexity = 40 in
-class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
- bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
- : STInst<(outs),
- (ins IntRegs:$src1, u2_0Imm:$src2, u6_0Ext:$src3, RC:$src4),
- mnemonic#"($src1<<#$src2 + #$src3) = $src4"#!if(isHalf, ".h",""),
- []>, ImmRegShl, NewValueRel {
-
- bits<5> src1;
- bits<2> src2;
- bits<6> src3;
- bits<5> src4;
-
- let accessSize = AccessSz;
- let CextOpcode = CextOp;
- let BaseOpcode = CextOp#"_shl";
-
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
-
- let IClass = 0b1010;
-
- let Inst{27-24} =0b1101;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = src2{1};
- let Inst{12-8} = src4;
- let Inst{7} = 0b1;
- let Inst{6} = src2{0};
- let Inst{5-0} = src3;
-}
-
-def S4_storerb_ur : T_StoreAbsReg <"memb", "STrib", IntRegs, 0b000, ByteAccess>;
-def S4_storerh_ur : T_StoreAbsReg <"memh", "STrih", IntRegs, 0b010,
- HalfWordAccess>;
-def S4_storerf_ur : T_StoreAbsReg <"memh", "STrif", IntRegs, 0b011,
- HalfWordAccess, 1>;
-def S4_storeri_ur : T_StoreAbsReg <"memw", "STriw", IntRegs, 0b100, WordAccess>;
-def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110,
- DoubleWordAccess>;
-
-let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset,
- opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in
-class T_StoreAbsRegNV <string mnemonic, string CextOp, bits<2> MajOp,
- MemAccessSize AccessSz>
- : NVInst <(outs ),
- (ins IntRegs:$src1, u2_0Imm:$src2, u6_0Ext:$src3, IntRegs:$src4),
- mnemonic#"($src1<<#$src2 + #$src3) = $src4.new">, NewValueRel {
- bits<5> src1;
- bits<2> src2;
- bits<6> src3;
- bits<3> src4;
-
- let CextOpcode = CextOp;
- let BaseOpcode = CextOp#"_shl";
- let IClass = 0b1010;
-
- let Inst{27-21} = 0b1101101;
- let Inst{12-11} = 0b00;
- let Inst{7} = 0b1;
- let Inst{20-16} = src1;
- let Inst{13} = src2{1};
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src4;
- let Inst{6} = src2{0};
- let Inst{5-0} = src3;
- }
-
-def S4_storerbnew_ur : T_StoreAbsRegNV <"memb", "STrib", 0b00, ByteAccess>;
-def S4_storerhnew_ur : T_StoreAbsRegNV <"memh", "STrih", 0b01, HalfWordAccess>;
-def S4_storerinew_ur : T_StoreAbsRegNV <"memw", "STriw", 0b10, WordAccess>;
-
-//===----------------------------------------------------------------------===//
-// Template classes for the non-predicated store instructions with
-// base + register offset addressing mode
-//===----------------------------------------------------------------------===//
-let isPredicable = 1 in
-class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH>
- : STInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, RC:$Rt),
- mnemonic#"($Rs + $Ru<<#$u2) = $Rt"#!if(isH, ".h",""),
- [],"",V4LDST_tc_st_SLOT01>, ImmRegShl, AddrModeRel {
-
- bits<5> Rs;
- bits<5> Ru;
- bits<2> u2;
- bits<5> Rt;
-
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
-
- let IClass = 0b0011;
-
- let Inst{27-24} = 0b1011;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Ru;
- let Inst{13} = u2{1};
- let Inst{7} = u2{0};
- let Inst{4-0} = Rt;
- }
-
-//===----------------------------------------------------------------------===//
-// Template classes for the predicated store instructions with
-// base + register offset addressing mode
-//===----------------------------------------------------------------------===//
-let isPredicated = 1 in
-class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
- bit isNot, bit isPredNew, bit isH>
- : STInst <(outs),
- (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, RC:$Rt),
-
- !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Rt"#!if(isH, ".h",""),
- [], "", V4LDST_tc_st_SLOT01> , AddrModeRel{
- bits<2> Pv;
- bits<5> Rs;
- bits<5> Ru;
- bits<2> u2;
- bits<5> Rt;
-
- let isPredicatedFalse = isNot;
- let isPredicatedNew = isPredNew;
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
-
- let IClass = 0b0011;
-
- let Inst{27-26} = 0b01;
- let Inst{25} = isPredNew;
- let Inst{24} = isNot;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Ru;
- let Inst{13} = u2{1};
- let Inst{7} = u2{0};
- let Inst{6-5} = Pv;
- let Inst{4-0} = Rt;
- }
-
-//===----------------------------------------------------------------------===//
-// Template classes for the new-value store instructions with
-// base + register offset addressing mode
-//===----------------------------------------------------------------------===//
-let isPredicable = 1, isNewValue = 1, opNewValue = 3 in
-class T_store_new_rr <string mnemonic, bits<2> MajOp> :
- NVInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, IntRegs:$Nt),
- mnemonic#"($Rs + $Ru<<#$u2) = $Nt.new",
- [],"",V4LDST_tc_st_SLOT0>, ImmRegShl, AddrModeRel {
-
- bits<5> Rs;
- bits<5> Ru;
- bits<2> u2;
- bits<3> Nt;
-
- let IClass = 0b0011;
-
- let Inst{27-21} = 0b1011101;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Ru;
- let Inst{13} = u2{1};
- let Inst{7} = u2{0};
- let Inst{4-3} = MajOp;
- let Inst{2-0} = Nt;
- }
-
-//===----------------------------------------------------------------------===//
-// Template classes for the predicated new-value store instructions with
-// base + register offset addressing mode
-//===----------------------------------------------------------------------===//
-let isPredicated = 1, isNewValue = 1, opNewValue = 4 in
-class T_pstore_new_rr <string mnemonic, bits<2> MajOp, bit isNot, bit isPredNew>
- : NVInst<(outs),
- (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, IntRegs:$Nt),
- !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Nt.new",
- [], "", V4LDST_tc_st_SLOT0>, AddrModeRel {
- bits<2> Pv;
- bits<5> Rs;
- bits<5> Ru;
- bits<2> u2;
- bits<3> Nt;
-
- let isPredicatedFalse = isNot;
- let isPredicatedNew = isPredNew;
-
- let IClass = 0b0011;
- let Inst{27-26} = 0b01;
- let Inst{25} = isPredNew;
- let Inst{24} = isNot;
- let Inst{23-21} = 0b101;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Ru;
- let Inst{13} = u2{1};
- let Inst{7} = u2{0};
- let Inst{6-5} = Pv;
- let Inst{4-3} = MajOp;
- let Inst{2-0} = Nt;
- }
-
-//===----------------------------------------------------------------------===//
-// multiclass for store instructions with base + register offset addressing
-// mode
-//===----------------------------------------------------------------------===//
-let isNVStorable = 1 in
-multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC,
- bits<3> MajOp, bit isH = 0> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
- def S4_#NAME#_rr : T_store_rr <mnemonic, RC, MajOp, isH>;
-
- // Predicated
- def S4_p#NAME#t_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 0, isH>;
- def S4_p#NAME#f_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 0, isH>;
-
- // Predicated new
- def S4_p#NAME#tnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 1, isH>;
- def S4_p#NAME#fnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 1, isH>;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// multiclass for new-value store instructions with base + register offset
-// addressing mode.
-//===----------------------------------------------------------------------===//
-let mayStore = 1, isNVStore = 1 in
-multiclass ST_Idxd_shl_nv <string mnemonic, string CextOp, RegisterClass RC,
- bits<2> MajOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
- def S4_#NAME#new_rr : T_store_new_rr<mnemonic, MajOp>;
-
- // Predicated
- def S4_p#NAME#newt_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 0>;
- def S4_p#NAME#newf_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 0>;
-
- // Predicated new
- def S4_p#NAME#newtnew_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 1>;
- def S4_p#NAME#newfnew_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 1>;
- }
-}
-
-let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0 in {
- let accessSize = ByteAccess in
- defm storerb: ST_Idxd_shl<"memb", "STrib", IntRegs, 0b000>,
- ST_Idxd_shl_nv<"memb", "STrib", IntRegs, 0b00>;
-
- let accessSize = HalfWordAccess in
- defm storerh: ST_Idxd_shl<"memh", "STrih", IntRegs, 0b010>,
- ST_Idxd_shl_nv<"memh", "STrih", IntRegs, 0b01>;
-
- let accessSize = WordAccess in
- defm storeri: ST_Idxd_shl<"memw", "STriw", IntRegs, 0b100>,
- ST_Idxd_shl_nv<"memw", "STriw", IntRegs, 0b10>;
-
- let isNVStorable = 0, accessSize = DoubleWordAccess in
- defm storerd: ST_Idxd_shl<"memd", "STrid", DoubleRegs, 0b110>;
-
- let isNVStorable = 0, accessSize = HalfWordAccess in
- defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>;
-}
-
-//===----------------------------------------------------------------------===//
-// Template class
-//===----------------------------------------------------------------------===//
-let isPredicable = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 8,
- opExtendable = 2 in
-class T_StoreImm <string mnemonic, Operand OffsetOp, bits<2> MajOp >
- : STInst <(outs ), (ins IntRegs:$Rs, OffsetOp:$offset, s8_0Ext:$S8),
- mnemonic#"($Rs+#$offset)=#$S8",
- [], "", V4LDST_tc_st_SLOT01>,
- ImmRegRel, PredNewRel {
- bits<5> Rs;
- bits<8> S8;
- bits<8> offset;
- bits<6> offsetBits;
-
- string OffsetOpStr = !cast<string>(OffsetOp);
- let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2},
- !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1},
- /* u6_0Imm */ offset{5-0}));
-
- let IClass = 0b0011;
-
- let Inst{27-25} = 0b110;
- let Inst{22-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{12-7} = offsetBits;
- let Inst{13} = S8{7};
- let Inst{6-0} = S8{6-0};
- }
-
-let isPredicated = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 6,
- opExtendable = 3 in
-class T_StoreImm_pred <string mnemonic, Operand OffsetOp, bits<2> MajOp,
- bit isPredNot, bit isPredNew >
- : STInst <(outs ),
- (ins PredRegs:$Pv, IntRegs:$Rs, OffsetOp:$offset, s6_0Ext:$S6),
- !if(isPredNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($Rs+#$offset)=#$S6",
- [], "", V4LDST_tc_st_SLOT01>,
- ImmRegRel, PredNewRel {
- bits<2> Pv;
- bits<5> Rs;
- bits<6> S6;
- bits<8> offset;
- bits<6> offsetBits;
-
- string OffsetOpStr = !cast<string>(OffsetOp);
- let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2},
- !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1},
- /* u6_0Imm */ offset{5-0}));
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = isPredNot;
-
- let IClass = 0b0011;
-
- let Inst{27-25} = 0b100;
- let Inst{24} = isPredNew;
- let Inst{23} = isPredNot;
- let Inst{22-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{13} = S6{5};
- let Inst{12-7} = offsetBits;
- let Inst{6-5} = Pv;
- let Inst{4-0} = S6{4-0};
- }
-
-
-//===----------------------------------------------------------------------===//
-// multiclass for store instructions with base + immediate offset
-// addressing mode and immediate stored value.
-// mem[bhw](Rx++#s4:3)=#s8
-// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6
-//===----------------------------------------------------------------------===//
-
-multiclass ST_Imm_Pred <string mnemonic, Operand OffsetOp, bits<2> MajOp,
- bit PredNot> {
- def _io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 0>;
- // Predicate new
- def new_io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 1>;
-}
-
-multiclass ST_Imm <string mnemonic, string CextOp, Operand OffsetOp,
- bits<2> MajOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in {
- def _io : T_StoreImm <mnemonic, OffsetOp, MajOp>;
-
- defm t : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 0>;
- defm f : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 1>;
- }
-}
-
-let hasSideEffects = 0, addrMode = BaseImmOffset,
- InputType = "imm" in {
- let accessSize = ByteAccess in
- defm S4_storeirb : ST_Imm<"memb", "STrib", u6_0Imm, 0b00>;
-
- let accessSize = HalfWordAccess in
- defm S4_storeirh : ST_Imm<"memh", "STrih", u6_1Imm, 0b01>;
-
- let accessSize = WordAccess in
- defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>;
-}
-
-//===----------------------------------------------------------------------===
-// ST -
-//===----------------------------------------------------------------------===
-
-
-//===----------------------------------------------------------------------===//
-// NV/ST +
-//===----------------------------------------------------------------------===//
-
-let opNewValue = 2, opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
-class T_store_io_nv <string mnemonic, RegisterClass RC,
- Operand ImmOp, bits<2>MajOp>
- : NVInst_V4 <(outs),
- (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- mnemonic#"($src1+#$src2) = $src3.new",
- [],"",ST_tc_st_SLOT0> {
- bits<5> src1;
- bits<13> src2; // Actual address offset
- bits<3> src3;
- bits<11> offsetBits; // Represents offset encoding
-
- let opExtentBits = !if (!eq(mnemonic, "memb"), 11,
- !if (!eq(mnemonic, "memh"), 12,
- !if (!eq(mnemonic, "memw"), 13, 0)));
-
- let opExtentAlign = !if (!eq(mnemonic, "memb"), 0,
- !if (!eq(mnemonic, "memh"), 1,
- !if (!eq(mnemonic, "memw"), 2, 0)));
-
- let offsetBits = !if (!eq(mnemonic, "memb"), src2{10-0},
- !if (!eq(mnemonic, "memh"), src2{11-1},
- !if (!eq(mnemonic, "memw"), src2{12-2}, 0)));
-
- let IClass = 0b1010;
-
- let Inst{27} = 0b0;
- let Inst{26-25} = offsetBits{10-9};
- let Inst{24-21} = 0b1101;
- let Inst{20-16} = src1;
- let Inst{13} = offsetBits{8};
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src3;
- let Inst{7-0} = offsetBits{7-0};
- }
-
-let opExtendable = 2, opNewValue = 3, isPredicated = 1 in
-class T_pstore_io_nv <string mnemonic, RegisterClass RC, Operand predImmOp,
- bits<2>MajOp, bit PredNot, bit isPredNew>
- : NVInst_V4 <(outs),
- (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC:$src4),
- !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2+#$src3) = $src4.new",
- [],"",V2LDST_tc_st_SLOT0> {
- bits<2> src1;
- bits<5> src2;
- bits<9> src3;
- bits<3> src4;
- bits<6> offsetBits; // Represents offset encoding
-
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = PredNot;
- let opExtentBits = !if (!eq(mnemonic, "memb"), 6,
- !if (!eq(mnemonic, "memh"), 7,
- !if (!eq(mnemonic, "memw"), 8, 0)));
-
- let opExtentAlign = !if (!eq(mnemonic, "memb"), 0,
- !if (!eq(mnemonic, "memh"), 1,
- !if (!eq(mnemonic, "memw"), 2, 0)));
-
- let offsetBits = !if (!eq(mnemonic, "memb"), src3{5-0},
- !if (!eq(mnemonic, "memh"), src3{6-1},
- !if (!eq(mnemonic, "memw"), src3{7-2}, 0)));
-
- let IClass = 0b0100;
-
- let Inst{27} = 0b0;
- let Inst{26} = PredNot;
- let Inst{25} = isPredNew;
- let Inst{24-21} = 0b0101;
- let Inst{20-16} = src2;
- let Inst{13} = offsetBits{5};
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src4;
- let Inst{7-3} = offsetBits{4-0};
- let Inst{2} = 0b0;
- let Inst{1-0} = src1;
- }
-
-// multiclass for new-value store instructions with base + immediate offset.
-//
-let mayStore = 1, isNVStore = 1, isNewValue = 1, hasSideEffects = 0,
- isExtendable = 1 in
-multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, Operand predImmOp, bits<2> MajOp> {
-
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
- def S2_#NAME#new_io : T_store_io_nv <mnemonic, RC, ImmOp, MajOp>;
- // Predicated
- def S2_p#NAME#newt_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 0, 0>;
- def S2_p#NAME#newf_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 1, 0>;
- // Predicated new
- def S4_p#NAME#newtnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp,
- MajOp, 0, 1>;
- def S4_p#NAME#newfnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp,
- MajOp, 1, 1>;
- }
-}
-
-let addrMode = BaseImmOffset, InputType = "imm" in {
- let accessSize = ByteAccess in
- defm storerb: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext,
- u6_0Ext, 0b00>, AddrModeRel;
-
- let accessSize = HalfWordAccess, opExtentAlign = 1 in
- defm storerh: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext,
- u6_1Ext, 0b01>, AddrModeRel;
-
- let accessSize = WordAccess, opExtentAlign = 2 in
- defm storeri: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext,
- u6_2Ext, 0b10>, AddrModeRel;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment loads with register offset.
-//===----------------------------------------------------------------------===//
-
-let hasNewValue = 1 in
-def L2_loadbsw2_pr : T_load_pr <"membh", IntRegs, 0b0001, HalfWordAccess>;
-
-def L2_loadbsw4_pr : T_load_pr <"membh", DoubleRegs, 0b0111, WordAccess>;
-
-let hasSideEffects = 0, addrMode = PostInc in
-class T_loadalign_pr <string mnemonic, bits<4> MajOp, MemAccessSize AccessSz>
- : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$_dst_),
- (ins DoubleRegs:$src1, IntRegs:$src2, ModRegs:$src3),
- "$dst = "#mnemonic#"($src2++$src3)", [],
- "$src1 = $dst, $src2 = $_dst_"> {
- bits<5> dst;
- bits<5> src2;
- bits<1> src3;
-
- let accessSize = AccessSz;
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b110;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13} = src3;
- let Inst{12} = 0b0;
- let Inst{7} = 0b0;
- let Inst{4-0} = dst;
- }
-
-def L2_loadalignb_pr : T_loadalign_pr <"memb_fifo", 0b0100, ByteAccess>;
-def L2_loadalignh_pr : T_loadalign_pr <"memh_fifo", 0b0010, HalfWordAccess>;
-
-//===----------------------------------------------------------------------===//
-// Template class for non-predicated post increment .new stores
-// mem[bhwd](Rx++#s4:[0123])=Nt.new
-//===----------------------------------------------------------------------===//
-let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1,
- isNewValue = 1, opNewValue = 3 in
-class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp >
- : NVInstPI_V4 <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ImmOp:$offset, IntRegs:$src2),
- mnemonic#"($src1++#$offset) = $src2.new",
- [], "$src1 = $_dst_">,
- AddrModeRel {
- bits<5> src1;
- bits<3> src2;
- bits<7> offset;
- bits<4> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0}));
- let IClass = 0b1010;
-
- let Inst{27-21} = 0b1011101;
- let Inst{20-16} = src1;
- let Inst{13} = 0b0;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src2;
- let Inst{7} = 0b0;
- let Inst{6-3} = offsetBits;
- let Inst{1} = 0b0;
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated post increment .new stores
-// if([!]Pv[.new]) mem[bhwd](Rx++#s4:[0123])=Nt.new
-//===----------------------------------------------------------------------===//
-let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1,
- isNewValue = 1, opNewValue = 4 in
-class T_StorePI_nv_pred <string mnemonic, Operand ImmOp,
- bits<2> MajOp, bit isPredNot, bit isPredNew >
- : NVInstPI_V4 <(outs IntRegs:$_dst_),
- (ins PredRegs:$src1, IntRegs:$src2,
- ImmOp:$offset, IntRegs:$src3),
- !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2++#$offset) = $src3.new",
- [], "$src2 = $_dst_">,
- AddrModeRel {
- bits<2> src1;
- bits<5> src2;
- bits<3> src3;
- bits<7> offset;
- bits<4> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0}));
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = isPredNot;
-
- let IClass = 0b1010;
-
- let Inst{27-21} = 0b1011101;
- let Inst{20-16} = src2;
- let Inst{13} = 0b1;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src3;
- let Inst{7} = isPredNew;
- let Inst{6-3} = offsetBits;
- let Inst{2} = isPredNot;
- let Inst{1-0} = src1;
- }
-
-multiclass ST_PostInc_Pred_nv<string mnemonic, Operand ImmOp,
- bits<2> MajOp, bit PredNot> {
- def _pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 0>;
-
- // Predicate new
- def new_pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 1>;
-}
-
-multiclass ST_PostInc_nv<string mnemonic, string BaseOp, Operand ImmOp,
- bits<2> MajOp> {
- let BaseOpcode = "POST_"#BaseOp in {
- def S2_#NAME#_pi : T_StorePI_nv <mnemonic, ImmOp, MajOp>;
-
- // Predicated
- defm S2_p#NAME#t : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 0>;
- defm S2_p#NAME#f : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 1>;
- }
-}
-
-let accessSize = ByteAccess in
-defm storerbnew: ST_PostInc_nv <"memb", "STrib", s4_0Imm, 0b00>;
-
-let accessSize = HalfWordAccess in
-defm storerhnew: ST_PostInc_nv <"memh", "STrih", s4_1Imm, 0b01>;
-
-let accessSize = WordAccess in
-defm storerinew: ST_PostInc_nv <"memw", "STriw", s4_2Imm, 0b10>;
-
-//===----------------------------------------------------------------------===//
-// Template class for post increment .new stores with register offset
-//===----------------------------------------------------------------------===//
-let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in
-class T_StorePI_RegNV <string mnemonic, bits<2> MajOp, MemAccessSize AccessSz>
- : NVInstPI_V4 <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ModRegs:$src2, IntRegs:$src3),
- #mnemonic#"($src1++$src2) = $src3.new",
- [], "$src1 = $_dst_"> {
- bits<5> src1;
- bits<1> src2;
- bits<3> src3;
- let accessSize = AccessSz;
-
- let IClass = 0b1010;
-
- let Inst{27-21} = 0b1101101;
- let Inst{20-16} = src1;
- let Inst{13} = src2;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src3;
- let Inst{7} = 0b0;
- }
-
-def S2_storerbnew_pr : T_StorePI_RegNV<"memb", 0b00, ByteAccess>;
-def S2_storerhnew_pr : T_StorePI_RegNV<"memh", 0b01, HalfWordAccess>;
-def S2_storerinew_pr : T_StorePI_RegNV<"memw", 0b10, WordAccess>;
-
-// memb(Rx++#s4:0:circ(Mu))=Nt.new
-// memb(Rx++I:circ(Mu))=Nt.new
-// memb(Rx++Mu:brev)=Nt.new
-// memh(Rx++#s4:1:circ(Mu))=Nt.new
-// memh(Rx++I:circ(Mu))=Nt.new
-// memh(Rx++Mu)=Nt.new
-// memh(Rx++Mu:brev)=Nt.new
-
-// memw(Rx++#s4:2:circ(Mu))=Nt.new
-// memw(Rx++I:circ(Mu))=Nt.new
-// memw(Rx++Mu)=Nt.new
-// memw(Rx++Mu:brev)=Nt.new
-
-//===----------------------------------------------------------------------===//
-// NV/ST -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// NV/J +
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// multiclass/template class for the new-value compare jumps with the register
-// operands.
-//===----------------------------------------------------------------------===//
-
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11,
- opExtentAlign = 2 in
-class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
- bit isNegCond, bit isTak>
- : NVInst_V4<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
- "if ("#!if(isNegCond, "!","")#mnemonic#
- "($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")#
- "$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:"
- #!if(isTak, "t","nt")#" $offset", []> {
-
- bits<5> src1;
- bits<5> src2;
- bits<3> Ns; // New-Value Operand
- bits<5> RegOp; // Non-New-Value Operand
- bits<11> offset;
-
- let isTaken = isTak;
- let isPredicatedFalse = isNegCond;
- let opNewValue{0} = NvOpNum;
-
- let Ns = !if(!eq(NvOpNum, 0), src1{2-0}, src2{2-0});
- let RegOp = !if(!eq(NvOpNum, 0), src2, src1);
-
- let IClass = 0b0010;
- let Inst{27-26} = 0b00;
- let Inst{25-23} = majOp;
- let Inst{22} = isNegCond;
- let Inst{18-16} = Ns;
- let Inst{13} = isTak;
- let Inst{12-8} = RegOp;
- let Inst{21-20} = offset{10-9};
- let Inst{7-1} = offset{8-2};
-}
-
-
-multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum,
- bit isNegCond> {
- // Branch not taken:
- def _nt: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>;
- // Branch taken:
- def _t : NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>;
-}
-
-// NvOpNum = 0 -> First Operand is a new-value Register
-// NvOpNum = 1 -> Second Operand is a new-value Register
-
-multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp,
- bit NvOpNum> {
- let BaseOpcode = BaseOp#_NVJ in {
- defm _t_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond
- defm _f_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond
- }
-}
-
-// if ([!]cmp.eq(Ns.new,Rt)) jump:[n]t #r9:2
-// if ([!]cmp.gt(Ns.new,Rt)) jump:[n]t #r9:2
-// if ([!]cmp.gtu(Ns.new,Rt)) jump:[n]t #r9:2
-// if ([!]cmp.gt(Rt,Ns.new)) jump:[n]t #r9:2
-// if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2
-
-let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
- Defs = [PC], hasSideEffects = 0 in {
- defm J4_cmpeq : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel;
- defm J4_cmpgt : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel;
- defm J4_cmpgtu : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel;
- defm J4_cmplt : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel;
- defm J4_cmpltu : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel;
-}
-
-//===----------------------------------------------------------------------===//
-// multiclass/template class for the new-value compare jumps instruction
-// with a register and an unsigned immediate (U5) operand.
-//===----------------------------------------------------------------------===//
-
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11,
- opExtentAlign = 2 in
-class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
- bit isTak>
- : NVInst_V4<(outs),
- (ins IntRegs:$src1, u5_0Imm:$src2, brtarget:$offset),
- "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:"
- #!if(isTak, "t","nt")#" $offset", []> {
-
- let isTaken = isTak;
- let isPredicatedFalse = isNegCond;
- let isTaken = isTak;
-
- bits<3> src1;
- bits<5> src2;
- bits<11> offset;
-
- let IClass = 0b0010;
- let Inst{26} = 0b1;
- let Inst{25-23} = majOp;
- let Inst{22} = isNegCond;
- let Inst{18-16} = src1;
- let Inst{13} = isTak;
- let Inst{12-8} = src2;
- let Inst{21-20} = offset{10-9};
- let Inst{7-1} = offset{8-2};
-}
-
-multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> {
- // Branch not taken:
- def _nt: NVJri_template<mnemonic, majOp, isNegCond, 0>;
- // Branch taken:
- def _t : NVJri_template<mnemonic, majOp, isNegCond, 1>;
-}
-
-multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> {
- let BaseOpcode = BaseOp#_NVJri in {
- defm _t_jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond
- defm _f_jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond
- }
-}
-
-// if ([!]cmp.eq(Ns.new,#U5)) jump:[n]t #r9:2
-// if ([!]cmp.gt(Ns.new,#U5)) jump:[n]t #r9:2
-// if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2
-
-let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
- Defs = [PC], hasSideEffects = 0 in {
- defm J4_cmpeqi : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel;
- defm J4_cmpgti : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel;
- defm J4_cmpgtui : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel;
-}
-
-//===----------------------------------------------------------------------===//
-// multiclass/template class for the new-value compare jumps instruction
-// with a register and an hardcoded 0/-1 immediate value.
-//===----------------------------------------------------------------------===//
-
-let isExtendable = 1, isExtentSigned = 1, opExtentBits = 11,
- opExtentAlign = 2 in
-class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
- bit isNegCond, bit isTak>
- : NVInst_V4<(outs),
- !if(!eq(ImmVal, "{-1}"),
- (ins IntRegs:$src1, n1Const:$n1, brtarget:$offset),
- (ins IntRegs:$src1, brtarget:$offset)),
- "if ("#!if(isNegCond, "!","")#mnemonic
- #"($src1.new, #" # !if(!eq(ImmVal, "{-1}"), "$n1", ImmVal) # ")) jump:"
- #!if(isTak, "t","nt")#" $offset", []> {
-
- let isTaken = isTak;
- let isPredicatedFalse = isNegCond;
- let isTaken = isTak;
- let opExtendable = !if(!eq(ImmVal, "{-1}"), 2, 1);
-
- bits<3> src1;
- bits<11> offset;
- let IClass = 0b0010;
- let Inst{26} = 0b1;
- let Inst{25-23} = majOp;
- let Inst{22} = isNegCond;
- let Inst{18-16} = src1;
- let Inst{13} = isTak;
- let Inst{21-20} = offset{10-9};
- let Inst{7-1} = offset{8-2};
-}
-
-multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal,
- bit isNegCond> {
- // Branch not taken:
- def _nt: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>;
- // Branch taken:
- def _t : NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>;
-}
-
-multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
- string ImmVal> {
- let BaseOpcode = BaseOp#_NVJ_ConstImm in {
- defm _t_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True
- defm _f_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False
- }
-}
-
-// if ([!]tstbit(Ns.new,#0)) jump:[n]t #r9:2
-// if ([!]cmp.eq(Ns.new,#-1)) jump:[n]t #r9:2
-// if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2
-
-let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1,
- Defs = [PC], hasSideEffects = 0 in {
- defm J4_tstbit0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
- defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "{-1}">, PredRel;
- defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "{-1}">, PredRel;
-}
-
-// J4_hintjumpr: Hint indirect conditional jump.
-let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def J4_hintjumpr: JRInst <
- (outs),
- (ins IntRegs:$Rs),
- "hintjr($Rs)"> {
- bits<5> Rs;
- let IClass = 0b0101;
- let Inst{27-21} = 0b0010101;
- let Inst{20-16} = Rs;
- }
-
-//===----------------------------------------------------------------------===//
-// NV/J -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// CR +
-//===----------------------------------------------------------------------===//
-
-// PC-relative add
-let hasNewValue = 1, isExtendable = 1, opExtendable = 1,
- isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0, Uses = [PC] in
-def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6_0Ext:$u6),
- "$Rd = add(pc, #$u6)", [], "", CR_tc_2_SLOT3 > {
- bits<5> Rd;
- bits<6> u6;
-
- let IClass = 0b0110;
- let Inst{27-16} = 0b101001001001;
- let Inst{12-7} = u6;
- let Inst{4-0} = Rd;
- }
-
-
-
-let hasSideEffects = 0 in
-class T_LOGICAL_3OP<string MnOp1, string MnOp2, bits<2> OpBits, bit IsNeg>
- : CRInst<(outs PredRegs:$Pd),
- (ins PredRegs:$Ps, PredRegs:$Pt, PredRegs:$Pu),
- "$Pd = " # MnOp1 # "($Ps, " # MnOp2 # "($Pt, " #
- !if (IsNeg,"!","") # "$Pu))",
- [], "", CR_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<2> Ps;
- bits<2> Pt;
- bits<2> Pu;
-
- let IClass = 0b0110;
- let Inst{27-24} = 0b1011;
- let Inst{23} = IsNeg;
- let Inst{22-21} = OpBits;
- let Inst{20} = 0b1;
- let Inst{17-16} = Ps;
- let Inst{13} = 0b0;
- let Inst{9-8} = Pt;
- let Inst{7-6} = Pu;
- let Inst{1-0} = Pd;
-}
-
-def C4_and_and : T_LOGICAL_3OP<"and", "and", 0b00, 0>;
-def C4_and_or : T_LOGICAL_3OP<"and", "or", 0b01, 0>;
-def C4_or_and : T_LOGICAL_3OP<"or", "and", 0b10, 0>;
-def C4_or_or : T_LOGICAL_3OP<"or", "or", 0b11, 0>;
-def C4_and_andn : T_LOGICAL_3OP<"and", "and", 0b00, 1>;
-def C4_and_orn : T_LOGICAL_3OP<"and", "or", 0b01, 1>;
-def C4_or_andn : T_LOGICAL_3OP<"or", "and", 0b10, 1>;
-def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>;
-
-//===----------------------------------------------------------------------===//
-// CR -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XTYPE/ALU +
-//===----------------------------------------------------------------------===//
-
-// Logical with-not instructions.
-def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>;
-def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>;
-
-let hasNewValue = 1, hasSideEffects = 0 in
-def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-21} = 0b0101111;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4-0} = Rd;
-}
-
-// Add and accumulate.
-// Rd=add(Rs,add(Ru,#s6))
-let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6,
- opExtendable = 3 in
-def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Ru, s6_0Ext:$s6),
- "$Rd = add($Rs, add($Ru, #$s6))" , [],
- "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Ru;
- bits<6> s6;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b10110;
- let Inst{22-21} = s6{5-4};
- let Inst{20-16} = Rs;
- let Inst{13} = s6{3};
- let Inst{12-8} = Rd;
- let Inst{7-5} = s6{2-0};
- let Inst{4-0} = Ru;
- }
-
-let isExtentSigned = 1, hasSideEffects = 0, hasNewValue = 1, isExtendable = 1,
- opExtentBits = 6, opExtendable = 2 in
-def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, s6_0Ext:$s6, IntRegs:$Ru),
- "$Rd = add($Rs, sub(#$s6, $Ru))",
- [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<6> s6;
- bits<5> Ru;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b10111;
- let Inst{22-21} = s6{5-4};
- let Inst{20-16} = Rs;
- let Inst{13} = s6{3};
- let Inst{12-8} = Rd;
- let Inst{7-5} = s6{2-0};
- let Inst{4-0} = Ru;
- }
-
-def S4_extractp_rp : T_S3op_64 < "extract", 0b11, 0b100, 0>;
-def S4_extractp : T_S2op_extract <"extract", 0b1010, DoubleRegs, u6_0Imm>;
-
-let hasNewValue = 1 in {
- def S4_extract_rp : T_S3op_extract<"extract", 0b01>;
- def S4_extract : T_S2op_extract <"extract", 0b1101, IntRegs, u5_0Imm>;
-}
-
-// Complex add/sub halfwords/words
-let Defs = [USR_OVF] in {
- def S4_vxaddsubh : T_S3op_64 < "vxaddsubh", 0b01, 0b100, 0, 1>;
- def S4_vxaddsubw : T_S3op_64 < "vxaddsubw", 0b01, 0b000, 0, 1>;
- def S4_vxsubaddh : T_S3op_64 < "vxsubaddh", 0b01, 0b110, 0, 1>;
- def S4_vxsubaddw : T_S3op_64 < "vxsubaddw", 0b01, 0b010, 0, 1>;
-}
-
-let Defs = [USR_OVF] in {
- def S4_vxaddsubhr : T_S3op_64 < "vxaddsubh", 0b11, 0b000, 0, 1, 1, 1>;
- def S4_vxsubaddhr : T_S3op_64 < "vxsubaddh", 0b11, 0b010, 0, 1, 1, 1>;
-}
-
-let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF] in {
- def M4_mac_up_s1_sat: T_MType_acc_rr<"+= mpy", 0b011, 0b000, 0, [], 0, 1, 1>;
- def M4_nac_up_s1_sat: T_MType_acc_rr<"-= mpy", 0b011, 0b001, 0, [], 0, 1, 1>;
-}
-
-// Logical xor with xor accumulation.
-// Rxx^=xor(Rss,Rtt)
-let hasSideEffects = 0 in
-def M4_xor_xacc
- : SInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rxx ^= xor($Rss, $Rtt)", [],
- "$dst2 = $Rxx", S_3op_tc_1_SLOT23> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1100;
-
- let Inst{27-22} = 0b101010;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- let Inst{7-5} = 0b000;
- let Inst{4-0} = Rxx;
- }
-
-// Rotate and reduce bytes
-// Rdd=vrcrotate(Rss,Rt,#u2)
-let hasSideEffects = 0 in
-def S4_vrcrotate
- : SInst <(outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$Rss, IntRegs:$Rt, u2_0Imm:$u2),
- "$Rdd = vrcrotate($Rss, $Rt, #$u2)",
- [], "", S_3op_tc_3x_SLOT23> {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rt;
- bits<2> u2;
-
- let IClass = 0b1100;
-
- let Inst{27-22} = 0b001111;
- let Inst{20-16} = Rss;
- let Inst{13} = u2{1};
- let Inst{12-8} = Rt;
- let Inst{7-6} = 0b11;
- let Inst{5} = u2{0};
- let Inst{4-0} = Rdd;
- }
-
-// Rotate and reduce bytes with accumulation
-// Rxx+=vrcrotate(Rss,Rt,#u2)
-let hasSideEffects = 0 in
-def S4_vrcrotate_acc
- : SInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2_0Imm:$u2),
- "$Rxx += vrcrotate($Rss, $Rt, #$u2)", [],
- "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rt;
- bits<2> u2;
-
- let IClass = 0b1100;
-
- let Inst{27-21} = 0b1011101;
- let Inst{20-16} = Rss;
- let Inst{13} = u2{1};
- let Inst{12-8} = Rt;
- let Inst{5} = u2{0};
- let Inst{4-0} = Rxx;
- }
-
-// Vector reduce conditional negate halfwords
-let hasSideEffects = 0 in
-def S2_vrcnegh
- : SInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt),
- "$Rxx += vrcnegh($Rss, $Rt)", [],
- "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rt;
-
- let IClass = 0b1100;
-
- let Inst{27-21} = 0b1011001;
- let Inst{20-16} = Rss;
- let Inst{13} = 0b1;
- let Inst{12-8} = Rt;
- let Inst{7-5} = 0b111;
- let Inst{4-0} = Rxx;
- }
-
-// Split bitfield
-def A4_bitspliti : T_S2op_2_di <"bitsplit", 0b110, 0b100>;
-
-// Arithmetic/Convergent round
-def A4_cround_ri : T_S2op_2_ii <"cround", 0b111, 0b000>;
-
-def A4_round_ri : T_S2op_2_ii <"round", 0b111, 0b100>;
-
-let Defs = [USR_OVF] in
-def A4_round_ri_sat : T_S2op_2_ii <"round", 0b111, 0b110, 1>;
-
-// Logical-logical words.
-// Compound or-and -- Rx=or(Ru,and(Rx,#s10))
-let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10,
- opExtendable = 3 in
-def S4_or_andix:
- ALU64Inst<(outs IntRegs:$Rx),
- (ins IntRegs:$Ru, IntRegs:$_src_, s10_0Ext:$s10),
- "$Rx = or($Ru, and($_src_, #$s10))" , [] ,
- "$_src_ = $Rx", ALU64_tc_2_SLOT23> {
- bits<5> Rx;
- bits<5> Ru;
- bits<10> s10;
-
- let IClass = 0b1101;
-
- let Inst{27-22} = 0b101001;
- let Inst{20-16} = Rx;
- let Inst{21} = s10{9};
- let Inst{13-5} = s10{8-0};
- let Inst{4-0} = Ru;
- }
-
-// Miscellaneous ALU64 instructions.
-//
-let hasNewValue = 1, hasSideEffects = 0 in
-def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = modwrap($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-21} = 0b0011111;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7-5} = 0b111;
- let Inst{4-0} = Rd;
-}
-
-let hasSideEffects = 0 in
-def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = bitsplit($Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-24} = 0b0100;
- let Inst{21} = 0b1;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4-0} = Rd;
-}
-
-let hasSideEffects = 0 in
-def dep_S2_packhl: ALU64Inst<(outs DoubleRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = packhl($Rs, $Rt):deprecated", [], "", ALU64_tc_1_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-24} = 0b0100;
- let Inst{21} = 0b0;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4-0} = Rd;
-}
-
-let hasNewValue = 1, hasSideEffects = 0 in
-def dep_A2_addsat: ALU64Inst<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = add($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-21} = 0b0101100;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7} = 0b0;
- let Inst{4-0} = Rd;
-}
-
-let hasNewValue = 1, hasSideEffects = 0 in
-def dep_A2_subsat: ALU64Inst<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = sub($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-21} = 0b0101100;
- let Inst{20-16} = Rt;
- let Inst{12-8} = Rs;
- let Inst{7} = 0b1;
- let Inst{4-0} = Rd;
-}
-
-// Rx[&|]=xor(Rs,Rt)
-def M4_or_xor : T_MType_acc_rr < "|= xor", 0b110, 0b001, 0>;
-def M4_and_xor : T_MType_acc_rr < "&= xor", 0b010, 0b010, 0>;
-
-// Rx[&|^]=or(Rs,Rt)
-def M4_xor_or : T_MType_acc_rr < "^= or", 0b110, 0b011, 0>;
-
-let CextOpcode = "ORr_ORr" in
-def M4_or_or : T_MType_acc_rr < "|= or", 0b110, 0b000, 0>;
-def M4_and_or : T_MType_acc_rr < "&= or", 0b010, 0b001, 0>;
-
-// Rx[&|^]=and(Rs,Rt)
-def M4_xor_and : T_MType_acc_rr < "^= and", 0b110, 0b010, 0>;
-
-let CextOpcode = "ORr_ANDr" in
-def M4_or_and : T_MType_acc_rr < "|= and", 0b010, 0b011, 0>;
-def M4_and_and : T_MType_acc_rr < "&= and", 0b010, 0b000, 0>;
-
-// Rx[&|^]=and(Rs,~Rt)
-def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>;
-def M4_or_andn : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>;
-def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>;
-
-// Compound or-or and or-and
-let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1,
- opExtentBits = 10, opExtendable = 3 in
-class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
- : MInst_acc <(outs IntRegs:$Rx),
- (ins IntRegs:$src1, IntRegs:$Rs, s10_0Ext:$s10),
- "$Rx |= "#mnemonic#"($Rs, #$s10)", [],
- "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel {
- bits<5> Rx;
- bits<5> Rs;
- bits<10> s10;
-
- let IClass = 0b1101;
-
- let Inst{27-24} = 0b1010;
- let Inst{23-22} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{21} = s10{9};
- let Inst{13-5} = s10{8-0};
- let Inst{4-0} = Rx;
- }
-
-let CextOpcode = "ORr_ANDr" in
-def S4_or_andi : T_CompOR <"and", 0b00, and>;
-
-let CextOpcode = "ORr_ORr" in
-def S4_or_ori : T_CompOR <"or", 0b10, or>;
-
-// Modulo wrap
-// Rd=modwrap(Rs,Rt)
-// Round
-// Rd=cround(Rs,#u5)
-// Rd=cround(Rs,Rt)
-// Rd=round(Rs,#u5)[:sat]
-// Rd=round(Rs,Rt)[:sat]
-// Vector reduce add unsigned halfwords
-// Rd=vraddh(Rss,Rtt)
-// Vector add bytes
-// Rdd=vaddb(Rss,Rtt)
-// Vector conditional negate
-// Rdd=vcnegh(Rss,Rt)
-// Rxx+=vrcnegh(Rss,Rt)
-// Vector maximum bytes
-// Rdd=vmaxb(Rtt,Rss)
-// Vector reduce maximum halfwords
-// Rxx=vrmaxh(Rss,Ru)
-// Rxx=vrmaxuh(Rss,Ru)
-// Vector reduce maximum words
-// Rxx=vrmaxuw(Rss,Ru)
-// Rxx=vrmaxw(Rss,Ru)
-// Vector minimum bytes
-// Rdd=vminb(Rtt,Rss)
-// Vector reduce minimum halfwords
-// Rxx=vrminh(Rss,Ru)
-// Rxx=vrminuh(Rss,Ru)
-// Vector reduce minimum words
-// Rxx=vrminuw(Rss,Ru)
-// Rxx=vrminw(Rss,Ru)
-// Vector subtract bytes
-// Rdd=vsubb(Rss,Rtt)
-
-//===----------------------------------------------------------------------===//
-// XTYPE/ALU -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XTYPE/BIT +
-//===----------------------------------------------------------------------===//
-
-// Bit reverse
-def S2_brevp : T_S2op_3 <"brev", 0b11, 0b110>;
-
-// Bit count
-def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>;
-def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>;
-def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>;
-
-let hasSideEffects = 0, hasNewValue = 1 in
-def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6_0Imm:$s6),
- "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
- bits<5> Rs;
- bits<5> Rd;
- bits<6> s6;
- let IClass = 0b1000;
- let Inst{27-24} = 0b1100;
- let Inst{23-21} = 0b001;
- let Inst{20-16} = Rs;
- let Inst{13-8} = s6;
- let Inst{7-5} = 0b000;
- let Inst{4-0} = Rd;
-}
-
-let hasSideEffects = 0, hasNewValue = 1 in
-def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6_0Imm:$s6),
- "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
- bits<5> Rs;
- bits<5> Rd;
- bits<6> s6;
- let IClass = 0b1000;
- let Inst{27-24} = 0b1000;
- let Inst{23-21} = 0b011;
- let Inst{20-16} = Rs;
- let Inst{13-8} = s6;
- let Inst{7-5} = 0b010;
- let Inst{4-0} = Rd;
-}
-
-
-// Bit test/set/clear
-def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>;
-def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>;
-
-def C4_nbitsset : T_TEST_BITS_REG<"!bitsset", 0b01, 1>;
-def C4_nbitsclr : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>;
-def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>;
-
-//===----------------------------------------------------------------------===//
-// XTYPE/BIT -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XTYPE/MPY +
-//===----------------------------------------------------------------------===//
-
-// Rd=add(#u6,mpyi(Rs,#U6)) -- Multiply by immed and add immed.
-
-let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
-def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
- (ins u6_0Ext:$u6, IntRegs:$Rs, u6_0Imm:$U6),
- "$Rd = add(#$u6, mpyi($Rs, #$U6))" , [],"",ALU64_tc_3x_SLOT23> {
- bits<5> Rd;
- bits<6> u6;
- bits<5> Rs;
- bits<6> U6;
-
- let IClass = 0b1101;
-
- let Inst{27-24} = 0b1000;
- let Inst{23} = U6{5};
- let Inst{22-21} = u6{5-4};
- let Inst{20-16} = Rs;
- let Inst{13} = u6{3};
- let Inst{12-8} = Rd;
- let Inst{7-5} = u6{2-0};
- let Inst{4-0} = U6{4-0};
- }
-
-// Rd=add(#u6,mpyi(Rs,Rt))
-let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1,
- isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
-def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd),
- (ins u6_0Ext:$u6, IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = add(#$u6, mpyi($Rs, $Rt))" , [], "", ALU64_tc_3x_SLOT23>, ImmRegRel {
- bits<5> Rd;
- bits<6> u6;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b01110;
- let Inst{22-21} = u6{5-4};
- let Inst{20-16} = Rs;
- let Inst{13} = u6{3};
- let Inst{12-8} = Rt;
- let Inst{7-5} = u6{2-0};
- let Inst{4-0} = Rd;
- }
-
-let hasNewValue = 1 in
-class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins>
- : ALU64Inst <(outs IntRegs:$dst), ins,
- "$dst = add($src1, mpyi("#!if(MajOp,"$src3, #$src2))",
- "#$src2, $src3))"), [],
- "", ALU64_tc_3x_SLOT23> {
- bits<5> dst;
- bits<5> src1;
- bits<8> src2;
- bits<5> src3;
-
- let IClass = 0b1101;
-
- bits<6> ImmValue = !if(MajOp, src2{5-0}, src2{7-2});
-
- let Inst{27-24} = 0b1111;
- let Inst{23} = MajOp;
- let Inst{22-21} = ImmValue{5-4};
- let Inst{20-16} = src3;
- let Inst{13} = ImmValue{3};
- let Inst{12-8} = dst;
- let Inst{7-5} = ImmValue{2-0};
- let Inst{4-0} = src1;
- }
-
-def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred,
- (ins IntRegs:$src1, u6_2Imm:$src2, IntRegs:$src3)>;
-
-let isExtendable = 1, opExtentBits = 6, opExtendable = 3,
- CextOpcode = "ADD_MPY", InputType = "imm" in
-def M4_mpyri_addr : T_AddMpy<0b1, u32_0ImmPred,
- (ins IntRegs:$src1, IntRegs:$src3, u6_0Ext:$src2)>, ImmRegRel;
-
-// Rx=add(Ru,mpyi(Rx,Rs))
-let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in
-def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
- (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs),
- "$Rx = add($Ru, mpyi($_src_, $Rs))", [],
- "$_src_ = $Rx", M_tc_3x_SLOT23>, ImmRegRel {
- bits<5> Rx;
- bits<5> Ru;
- bits<5> Rs;
-
- let IClass = 0b1110;
-
- let Inst{27-21} = 0b0011000;
- let Inst{12-8} = Rx;
- let Inst{4-0} = Ru;
- let Inst{20-16} = Rs;
- }
-
-
-// Vector reduce multiply word by signed half (32x16)
-//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
-def M4_vrmpyeh_s0 : T_M2_vmpy<"vrmpyweh", 0b010, 0b100, 0, 0, 0>;
-def M4_vrmpyeh_s1 : T_M2_vmpy<"vrmpyweh", 0b110, 0b100, 1, 0, 0>;
-
-//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-def M4_vrmpyoh_s0 : T_M2_vmpy<"vrmpywoh", 0b001, 0b010, 0, 0, 0>;
-def M4_vrmpyoh_s1 : T_M2_vmpy<"vrmpywoh", 0b101, 0b010, 1, 0, 0>;
-
-//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
-def M4_vrmpyeh_acc_s0: T_M2_vmpy_acc<"vrmpyweh", 0b001, 0b110, 0, 0>;
-def M4_vrmpyeh_acc_s1: T_M2_vmpy_acc<"vrmpyweh", 0b101, 0b110, 1, 0>;
-
-//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-def M4_vrmpyoh_acc_s0: T_M2_vmpy_acc<"vrmpywoh", 0b011, 0b110, 0, 0>;
-def M4_vrmpyoh_acc_s1: T_M2_vmpy_acc<"vrmpywoh", 0b111, 0b110, 1, 0>;
-
-// Vector multiply halfwords, signed by unsigned
-// Rdd=vmpyhsu(Rs,Rt)[:<<]:sat
-def M2_vmpy2su_s0 : T_XTYPE_mpy64 < "vmpyhsu", 0b000, 0b111, 1, 0, 0>;
-def M2_vmpy2su_s1 : T_XTYPE_mpy64 < "vmpyhsu", 0b100, 0b111, 1, 1, 0>;
-
-// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
-def M2_vmac2su_s0 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b011, 0b101, 1, 0, 0>;
-def M2_vmac2su_s1 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b111, 0b101, 1, 1, 0>;
-
-// Vector polynomial multiply halfwords
-// Rdd=vpmpyh(Rs,Rt)
-def M4_vpmpyh : T_XTYPE_mpy64 < "vpmpyh", 0b110, 0b111, 0, 0, 0>;
-
-// Rxx^=vpmpyh(Rs,Rt)
-def M4_vpmpyh_acc : T_XTYPE_mpy64_acc < "vpmpyh", "^", 0b101, 0b111, 0, 0, 0>;
-
-// Polynomial multiply words
-// Rdd=pmpyw(Rs,Rt)
-def M4_pmpyw : T_XTYPE_mpy64 < "pmpyw", 0b010, 0b111, 0, 0, 0>;
-
-// Rxx^=pmpyw(Rs,Rt)
-def M4_pmpyw_acc : T_XTYPE_mpy64_acc < "pmpyw", "^", 0b001, 0b111, 0, 0, 0>;
-
-//===----------------------------------------------------------------------===//
-// XTYPE/MPY -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ALU64/Vector compare
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Template class for vector compare
-//===----------------------------------------------------------------------===//
-
-let hasSideEffects = 0 in
-class T_vcmpImm <string Str, bits<2> cmpOp, bits<2> minOp, Operand ImmOprnd>
- : ALU64_rr <(outs PredRegs:$Pd),
- (ins DoubleRegs:$Rss, ImmOprnd:$Imm),
- "$Pd = "#Str#"($Rss, #$Imm)",
- [], "", ALU64_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rss;
- bits<32> Imm;
- bits<8> ImmBits;
- let ImmBits{6-0} = Imm{6-0};
- let ImmBits{7} = !if (!eq(cmpOp,0b10), 0b0, Imm{7}); // 0 for vcmp[bhw].gtu
-
- let IClass = 0b1101;
-
- let Inst{27-24} = 0b1100;
- let Inst{22-21} = cmpOp;
- let Inst{20-16} = Rss;
- let Inst{12-5} = ImmBits;
- let Inst{4-3} = minOp;
- let Inst{1-0} = Pd;
- }
-
-// Vector compare bytes
-def A4_vcmpbgt : T_vcmp <"vcmpb.gt", 0b1010>;
-
-let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in
-def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>;
-
-def A4_vcmpbeqi : T_vcmpImm <"vcmpb.eq", 0b00, 0b00, u8_0Imm>;
-def A4_vcmpbgti : T_vcmpImm <"vcmpb.gt", 0b01, 0b00, s8_0Imm>;
-def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7_0Imm>;
-
-// Vector compare halfwords
-def A4_vcmpheqi : T_vcmpImm <"vcmph.eq", 0b00, 0b01, s8_0Imm>;
-def A4_vcmphgti : T_vcmpImm <"vcmph.gt", 0b01, 0b01, s8_0Imm>;
-def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7_0Imm>;
-
-// Vector compare words
-def A4_vcmpweqi : T_vcmpImm <"vcmpw.eq", 0b00, 0b10, s8_0Imm>;
-def A4_vcmpwgti : T_vcmpImm <"vcmpw.gt", 0b01, 0b10, s8_0Imm>;
-def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7_0Imm>;
-
-//===----------------------------------------------------------------------===//
-// XTYPE/SHIFT +
-//===----------------------------------------------------------------------===//
-// Shift by immediate and accumulate/logical.
-// Rx=add(#u8,asl(Rx,#U5)) Rx=add(#u8,lsr(Rx,#U5))
-// Rx=sub(#u8,asl(Rx,#U5)) Rx=sub(#u8,lsr(Rx,#U5))
-// Rx=and(#u8,asl(Rx,#U5)) Rx=and(#u8,lsr(Rx,#U5))
-// Rx=or(#u8,asl(Rx,#U5)) Rx=or(#u8,lsr(Rx,#U5))
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
- hasNewValue = 1, opNewValue = 0 in
-class T_S4_ShiftOperate<string MnOp, string MnSh, bit asl_lsr,
- bits<2> MajOp, InstrItinClass Itin>
- : MInst_acc<(outs IntRegs:$Rd), (ins u8_0Ext:$u8, IntRegs:$Rx, u5_0Imm:$U5),
- "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))",
- [], "$Rd = $Rx", Itin> {
-
- bits<5> Rd;
- bits<8> u8;
- bits<5> Rx;
- bits<5> U5;
-
- let IClass = 0b1101;
- let Inst{27-24} = 0b1110;
- let Inst{23-21} = u8{7-5};
- let Inst{20-16} = Rd;
- let Inst{13} = u8{4};
- let Inst{12-8} = U5;
- let Inst{7-5} = u8{3-1};
- let Inst{4} = asl_lsr;
- let Inst{3} = u8{0};
- let Inst{2-1} = MajOp;
-}
-
-multiclass T_ShiftOperate<string mnemonic, bits<2> MajOp, InstrItinClass Itin> {
- def _asl_ri : T_S4_ShiftOperate<mnemonic, "asl", 0, MajOp, Itin>;
- def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", 1, MajOp, Itin>;
-}
-
-defm S4_addi : T_ShiftOperate<"add", 0b10, ALU64_tc_2_SLOT23>;
-defm S4_andi : T_ShiftOperate<"and", 0b00, ALU64_tc_2_SLOT23>;
-defm S4_ori : T_ShiftOperate<"or", 0b01, ALU64_tc_1_SLOT23>;
-defm S4_subi : T_ShiftOperate<"sub", 0b11, ALU64_tc_1_SLOT23>;
-
-// Vector conditional negate
-// Rdd=vcnegh(Rss,Rt)
-let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
-def S2_vcnegh : T_S3op_shiftVect < "vcnegh", 0b11, 0b01>;
-
-// Rd=[cround|round](Rs,Rt)
-let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in {
- def A4_cround_rr : T_S3op_3 < "cround", IntRegs, 0b11, 0b00>;
- def A4_round_rr : T_S3op_3 < "round", IntRegs, 0b11, 0b10>;
-}
-
-// Rd=round(Rs,Rt):sat
-let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
-def A4_round_rr_sat : T_S3op_3 < "round", IntRegs, 0b11, 0b11, 1>;
-
-// Rd=[cmpyiwh|cmpyrwh](Rss,Rt):<<1:rnd:sat
-let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
- def M4_cmpyi_wh : T_S3op_8<"cmpyiwh", 0b100, 1, 1, 1>;
- def M4_cmpyr_wh : T_S3op_8<"cmpyrwh", 0b110, 1, 1, 1>;
-}
-
-// Rdd=[add|sub](Rss,Rtt,Px):carry
-let isPredicateLate = 1, hasSideEffects = 0 in
-class T_S3op_carry <string mnemonic, bits<3> MajOp>
- : SInst < (outs DoubleRegs:$Rdd, PredRegs:$Px),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu),
- "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu):carry",
- [], "$Px = $Pu", S_3op_tc_1_SLOT23 > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
- bits<2> Pu;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b0010;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- let Inst{6-5} = Pu;
- let Inst{4-0} = Rdd;
- }
-
-def A4_addp_c : T_S3op_carry < "add", 0b110 >;
-def A4_subp_c : T_S3op_carry < "sub", 0b111 >;
-
-let Itinerary = S_3op_tc_3_SLOT23, hasSideEffects = 0 in
-class T_S3op_6 <string mnemonic, bits<3> MinOp, bit isUnsigned>
- : SInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Ru),
- "$Rxx = "#mnemonic#"($Rss, $Ru)" ,
- [] , "$dst2 = $Rxx"> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Ru;
-
- let IClass = 0b1100;
-
- let Inst{27-21} = 0b1011001;
- let Inst{20-16} = Rss;
- let Inst{13} = isUnsigned;
- let Inst{12-8} = Rxx;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Ru;
- }
-
-// Vector reduce maximum halfwords
-// Rxx=vrmax[u]h(Rss,Ru)
-def A4_vrmaxh : T_S3op_6 < "vrmaxh", 0b001, 0>;
-def A4_vrmaxuh : T_S3op_6 < "vrmaxuh", 0b001, 1>;
-
-// Vector reduce maximum words
-// Rxx=vrmax[u]w(Rss,Ru)
-def A4_vrmaxw : T_S3op_6 < "vrmaxw", 0b010, 0>;
-def A4_vrmaxuw : T_S3op_6 < "vrmaxuw", 0b010, 1>;
-
-// Vector reduce minimum halfwords
-// Rxx=vrmin[u]h(Rss,Ru)
-def A4_vrminh : T_S3op_6 < "vrminh", 0b101, 0>;
-def A4_vrminuh : T_S3op_6 < "vrminuh", 0b101, 1>;
-
-// Vector reduce minimum words
-// Rxx=vrmin[u]w(Rss,Ru)
-def A4_vrminw : T_S3op_6 < "vrminw", 0b110, 0>;
-def A4_vrminuw : T_S3op_6 < "vrminuw", 0b110, 1>;
-
-// Shift an immediate left by register amount.
-let hasNewValue = 1, hasSideEffects = 0 in
-def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6_0Imm:$s6, IntRegs:$Rt),
- "$Rd = lsl(#$s6, $Rt)" , [], "", S_3op_tc_1_SLOT23> {
- bits<5> Rd;
- bits<6> s6;
- bits<5> Rt;
-
- let IClass = 0b1100;
-
- let Inst{27-22} = 0b011010;
- let Inst{20-16} = s6{5-1};
- let Inst{12-8} = Rt;
- let Inst{7-6} = 0b11;
- let Inst{4-0} = Rd;
- let Inst{5} = s6{0};
- }
-
-//===----------------------------------------------------------------------===//
-// XTYPE/SHIFT -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MEMOP
-//===----------------------------------------------------------------------===//
-
-
-//===----------------------------------------------------------------------===//
-// Template class for MemOp instructions with the register value.
-//===----------------------------------------------------------------------===//
-class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp,
- string memOp, bits<2> memOpBits> :
- MEMInst_V4<(outs),
- (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta),
- opc#"($base+#$offset)"#memOp#"$delta",
- []>,
- Requires<[UseMEMOP]> {
-
- bits<5> base;
- bits<5> delta;
- bits<32> offset;
- bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
-
- let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
- !if (!eq(opcBits, 0b01), offset{6-1},
- !if (!eq(opcBits, 0b10), offset{7-2},0)));
-
- let opExtentAlign = opcBits;
- let IClass = 0b0011;
- let Inst{27-24} = 0b1110;
- let Inst{22-21} = opcBits;
- let Inst{20-16} = base;
- let Inst{13} = 0b0;
- let Inst{12-7} = offsetBits;
- let Inst{6-5} = memOpBits;
- let Inst{4-0} = delta;
-}
-
-//===----------------------------------------------------------------------===//
-// Template class for MemOp instructions with the immediate value.
-//===----------------------------------------------------------------------===//
-class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
- string memOp, bits<2> memOpBits> :
- MEMInst_V4 <(outs),
- (ins IntRegs:$base, ImmOp:$offset, u5_0Imm:$delta),
- opc#"($base+#$offset)"#memOp#"#$delta"
- #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')'
- []>,
- Requires<[UseMEMOP]> {
-
- bits<5> base;
- bits<5> delta;
- bits<32> offset;
- bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
-
- let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
- !if (!eq(opcBits, 0b01), offset{6-1},
- !if (!eq(opcBits, 0b10), offset{7-2},0)));
-
- let opExtentAlign = opcBits;
- let IClass = 0b0011;
- let Inst{27-24} = 0b1111;
- let Inst{22-21} = opcBits;
- let Inst{20-16} = base;
- let Inst{13} = 0b0;
- let Inst{12-7} = offsetBits;
- let Inst{6-5} = memOpBits;
- let Inst{4-0} = delta;
-}
-
-// multiclass to define MemOp instructions with register operand.
-multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> {
- def L4_add#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add
- def L4_sub#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub
- def L4_and#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and
- def L4_or#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or
-}
-
-// multiclass to define MemOp instructions with immediate Operand.
-multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> {
- def L4_iadd#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >;
- def L4_isub#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >;
- def L4_iand#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = clrbit(", 0b10>;
- def L4_ior#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = setbit(", 0b11>;
-}
-
-multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> {
- defm _#NAME : MemOp_rr <opc, opcBits, ImmOp>;
- defm _#NAME : MemOp_ri <opc, opcBits, ImmOp>;
-}
-
-// Define MemOp instructions.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in {
- let opExtentBits = 6, accessSize = ByteAccess in
- defm memopb_io : MemOp_base <"memb", 0b00, u6_0Ext>;
-
- let opExtentBits = 7, accessSize = HalfWordAccess in
- defm memoph_io : MemOp_base <"memh", 0b01, u6_1Ext>;
-
- let opExtentBits = 8, accessSize = WordAccess in
- defm memopw_io : MemOp_base <"memw", 0b10, u6_2Ext>;
-}
-
-
-//===----------------------------------------------------------------------===//
-// XTYPE/PRED +
-//===----------------------------------------------------------------------===//
-
-// Hexagon V4 only supports these flavors of byte/half compare instructions:
-// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by
-// hardware. However, compiler can still implement these patterns through
-// appropriate patterns combinations based on current implemented patterns.
-// The implemented patterns are: EQ/GT/GTU.
-// Missing patterns are: GE/GEU/LT/LTU/LE/LEU.
-
-// Following instruction is not being extended as it results into the
-// incorrect code for negative numbers.
-// Pd=cmpb.eq(Rs,#u8)
-
-// p=!cmp.eq(r1,#s10)
-def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10_0Ext>;
-def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10_0Ext>;
-def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9_0Ext>;
-
-//===----------------------------------------------------------------------===//
-// XTYPE/PRED -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Multiclass for DeallocReturn
-//===----------------------------------------------------------------------===//
-class L4_RETURN<string mnemonic, bit isNot, bit isPredNew, bit isTak>
- : LD0Inst<(outs), (ins PredRegs:$src),
- !if(isNot, "if (!$src", "if ($src")#
- !if(isPredNew, ".new) ", ") ")#mnemonic#
- !if(isPredNew, #!if(isTak,":t", ":nt"),""),
- [], "", LD_tc_3or4stall_SLOT0> {
-
- bits<2> src;
- let BaseOpcode = "L4_RETURN";
- let isPredicatedFalse = isNot;
- let isPredicatedNew = isPredNew;
- let isTaken = isTak;
- let IClass = 0b1001;
-
- let Inst{27-16} = 0b011000011110;
-
- let Inst{13} = isNot;
- let Inst{12} = isTak;
- let Inst{11} = isPredNew;
- let Inst{10} = 0b0;
- let Inst{9-8} = src;
- let Inst{4-0} = 0b11110;
- }
-
-// Produce all predicated forms, p, !p, p.new, !p.new, :t, :nt
-multiclass L4_RETURN_PRED<string mnemonic, bit PredNot> {
- let isPredicated = 1 in {
- def _#NAME# : L4_RETURN <mnemonic, PredNot, 0, 1>;
- def _#NAME#new_pnt : L4_RETURN <mnemonic, PredNot, 1, 0>;
- def _#NAME#new_pt : L4_RETURN <mnemonic, PredNot, 1, 1>;
- }
-}
-
-multiclass LD_MISC_L4_RETURN<string mnemonic> {
- let isBarrier = 1, isPredicable = 1 in
- def NAME : LD0Inst <(outs), (ins), mnemonic, [], "",
- LD_tc_3or4stall_SLOT0> {
- let BaseOpcode = "L4_RETURN";
- let IClass = 0b1001;
- let Inst{27-16} = 0b011000011110;
- let Inst{13-10} = 0b0000;
- let Inst{4-0} = 0b11110;
- }
- defm t : L4_RETURN_PRED<mnemonic, 0 >;
- defm f : L4_RETURN_PRED<mnemonic, 1 >;
-}
-
-let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0 in
-defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel;
-
-// Restore registers and dealloc return function call.
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
- def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
-
- let isExtended = 1, opExtendable = 0 in
- def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">;
-
- let Defs = [R14, R15, R28, R29, R30, R31, PC] in {
- def RESTORE_DEALLOC_RET_JMP_V4_PIC : T_JMP<"">;
-
- let isExtended = 1, opExtendable = 0 in
- def RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC : T_JMP<"">;
- }
-}
-
-// Restore registers and dealloc frame before a tail call.
-let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
- def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<0, "">, PredRel;
-
- let isExtended = 1, opExtendable = 0 in
- def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<0, "">, PredRel;
-
- let Defs = [R14, R15, R28, R29, R30, R31, PC] in {
- def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC : T_Call<0, "">, PredRel;
-
- let isExtended = 1, opExtendable = 0 in
- def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC : T_Call<0, "">, PredRel;
- }
-}
-
-// Save registers function call.
-let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
- def SAVE_REGISTERS_CALL_V4 : T_Call<0, "">, PredRel;
-
- let isExtended = 1, opExtendable = 0 in
- def SAVE_REGISTERS_CALL_V4_EXT : T_Call<0, "">, PredRel;
-
- let Defs = [P0] in
- def SAVE_REGISTERS_CALL_V4STK : T_Call<0, "">, PredRel;
-
- let Defs = [P0], isExtended = 1, opExtendable = 0 in
- def SAVE_REGISTERS_CALL_V4STK_EXT : T_Call<0, "">, PredRel;
-
- let Defs = [R14, R15, R28] in
- def SAVE_REGISTERS_CALL_V4_PIC : T_Call<0, "">, PredRel;
-
- let Defs = [R14, R15, R28], isExtended = 1, opExtendable = 0 in
- def SAVE_REGISTERS_CALL_V4_EXT_PIC : T_Call<0, "">, PredRel;
-
- let Defs = [R14, R15, R28, P0] in
- def SAVE_REGISTERS_CALL_V4STK_PIC : T_Call<0, "">, PredRel;
-
- let Defs = [R14, R15, R28, P0], isExtended = 1, opExtendable = 0 in
- def SAVE_REGISTERS_CALL_V4STK_EXT_PIC : T_Call<0, "">, PredRel;
-}
-
-//===----------------------------------------------------------------------===//
-// Template class for non predicated store instructions with
-// GP-Relative or absolute addressing.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicable = 1 in
-class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<2>MajOp, bit isAbs, bit isHalf>
- : STInst<(outs), (ins ImmOp:$addr, RC:$src),
- mnemonic # "(#$addr) = $src"#!if(isHalf, ".h",""),
- [], "", V2LDST_tc_st_SLOT01> {
- bits<19> addr;
- bits<5> src;
- bits<16> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
- !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
- !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
- /* u16_0Imm */ addr{15-0})));
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
- let Uses = !if (isAbs, [], [GP]);
-
- let IClass = 0b0100;
- let Inst{27} = 1;
- let Inst{26-25} = offsetBits{15-14};
- let Inst{24} = 0b0;
- let Inst{23-22} = MajOp;
- let Inst{21} = isHalf;
- let Inst{20-16} = offsetBits{13-9};
- let Inst{13} = offsetBits{8};
- let Inst{12-8} = src;
- let Inst{7-0} = offsetBits{7-0};
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated store instructions with
-// GP-Relative or absolute addressing.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicated = 1, opExtentBits = 6, opExtendable = 1 in
-class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
- bit isHalf, bit isNot, bit isNew>
- : STInst<(outs), (ins PredRegs:$src1, u32_0MustExt:$absaddr, RC: $src2),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
- ") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""),
- [], "", ST_tc_st_SLOT01>, AddrModeRel {
- bits<2> src1;
- bits<6> absaddr;
- bits<5> src2;
-
- let isPredicatedNew = isNew;
- let isPredicatedFalse = isNot;
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
-
- let IClass = 0b1010;
-
- let Inst{27-24} = 0b1111;
- let Inst{23-22} = MajOp;
- let Inst{21} = isHalf;
- let Inst{17-16} = absaddr{5-4};
- let Inst{13} = isNew;
- let Inst{12-8} = src2;
- let Inst{7} = 0b1;
- let Inst{6-3} = absaddr{3-0};
- let Inst{2} = isNot;
- let Inst{1-0} = src1;
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated store instructions with absolute addressing.
-//===----------------------------------------------------------------------===//
-class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<2> MajOp, bit isHalf>
- : T_StoreAbsGP <mnemonic, RC, u32_0MustExt, MajOp, 1, isHalf>,
- AddrModeRel {
- string ImmOpStr = !cast<string>(ImmOp);
- let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
- !if (!eq(ImmOpStr, "u16_2Imm"), 18,
- !if (!eq(ImmOpStr, "u16_1Imm"), 17,
- /* u16_0Imm */ 16)));
-
- let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
- !if (!eq(ImmOpStr, "u16_2Imm"), 2,
- !if (!eq(ImmOpStr, "u16_1Imm"), 1,
- /* u16_0Imm */ 0)));
-}
-
-//===----------------------------------------------------------------------===//
-// Multiclass for store instructions with absolute addressing.
-//===----------------------------------------------------------------------===//
-let addrMode = Absolute, isExtended = 1 in
-multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, bits<2> MajOp, bit isHalf = 0> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
- let opExtendable = 0, isPredicable = 1 in
- def PS_#NAME#abs : T_StoreAbs <mnemonic, RC, ImmOp, MajOp, isHalf>;
-
- // Predicated
- def S4_p#NAME#t_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 0>;
- def S4_p#NAME#f_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 0>;
-
- // .new Predicated
- def S4_p#NAME#tnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 1>;
- def S4_p#NAME#fnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 1>;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Template class for non predicated new-value store instructions with
-// GP-Relative or absolute addressing.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1,
- isNewValue = 1, opNewValue = 1 in
-class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp>
- : NVInst_V4<(outs), (ins ImmOp:$addr, IntRegs:$src),
- mnemonic #"(#$addr) = $src.new",
- [], "", V2LDST_tc_st_SLOT0> {
- bits<19> addr;
- bits<3> src;
- bits<16> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
- !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
- !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
- /* u16_0Imm */ addr{15-0})));
- let IClass = 0b0100;
-
- let Inst{27} = 1;
- let Inst{26-25} = offsetBits{15-14};
- let Inst{24-21} = 0b0101;
- let Inst{20-16} = offsetBits{13-9};
- let Inst{13} = offsetBits{8};
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src;
- let Inst{7-0} = offsetBits{7-0};
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated new-value store instructions with
-// absolute addressing.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicated = 1, mayStore = 1, isNVStore = 1,
- isNewValue = 1, opNewValue = 2, opExtentBits = 6, opExtendable = 1 in
-class T_StoreAbs_NV_Pred <string mnemonic, bits<2> MajOp, bit isNot, bit isNew>
- : NVInst_V4<(outs), (ins PredRegs:$src1, u32_0MustExt:$absaddr, IntRegs:$src2),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
- ") ")#mnemonic#"(#$absaddr) = $src2.new",
- [], "", ST_tc_st_SLOT0>, AddrModeRel {
- bits<2> src1;
- bits<6> absaddr;
- bits<3> src2;
-
- let isPredicatedNew = isNew;
- let isPredicatedFalse = isNot;
-
- let IClass = 0b1010;
-
- let Inst{27-24} = 0b1111;
- let Inst{23-21} = 0b101;
- let Inst{17-16} = absaddr{5-4};
- let Inst{13} = isNew;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src2;
- let Inst{7} = 0b1;
- let Inst{6-3} = absaddr{3-0};
- let Inst{2} = isNot;
- let Inst{1-0} = src1;
-}
-
-//===----------------------------------------------------------------------===//
-// Template class for non-predicated new-value store instructions with
-// absolute addressing.
-//===----------------------------------------------------------------------===//
-class T_StoreAbs_NV <string mnemonic, Operand ImmOp, bits<2> MajOp>
- : T_StoreAbsGP_NV <mnemonic, u32_0MustExt, MajOp>, AddrModeRel {
-
- string ImmOpStr = !cast<string>(ImmOp);
- let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
- !if (!eq(ImmOpStr, "u16_2Imm"), 18,
- !if (!eq(ImmOpStr, "u16_1Imm"), 17,
- /* u16_0Imm */ 16)));
-
- let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
- !if (!eq(ImmOpStr, "u16_2Imm"), 2,
- !if (!eq(ImmOpStr, "u16_1Imm"), 1,
- /* u16_0Imm */ 0)));
-}
-
-//===----------------------------------------------------------------------===//
-// Multiclass for new-value store instructions with absolute addressing.
-//===----------------------------------------------------------------------===//
-let addrMode = Absolute, isExtended = 1 in
-multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp,
- bits<2> MajOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
- let opExtendable = 0, isPredicable = 1 in
- def PS_#NAME#newabs : T_StoreAbs_NV <mnemonic, ImmOp, MajOp>;
-
- // Predicated
- def S4_p#NAME#newt_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 0>;
- def S4_p#NAME#newf_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 0>;
-
- // .new Predicated
- def S4_p#NAME#newtnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 1>;
- def S4_p#NAME#newfnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 1>;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Stores with absolute addressing
-//===----------------------------------------------------------------------===//
-let accessSize = ByteAccess in
-defm storerb : ST_Abs <"memb", "STrib", IntRegs, u16_0Imm, 0b00>,
- ST_Abs_NV <"memb", "STrib", u16_0Imm, 0b00>;
-
-let accessSize = HalfWordAccess in
-defm storerh : ST_Abs <"memh", "STrih", IntRegs, u16_1Imm, 0b01>,
- ST_Abs_NV <"memh", "STrih", u16_1Imm, 0b01>;
-
-let accessSize = WordAccess in
-defm storeri : ST_Abs <"memw", "STriw", IntRegs, u16_2Imm, 0b10>,
- ST_Abs_NV <"memw", "STriw", u16_2Imm, 0b10>;
-
-let isNVStorable = 0, accessSize = DoubleWordAccess in
-defm storerd : ST_Abs <"memd", "STrid", DoubleRegs, u16_3Imm, 0b11>;
-
-let isNVStorable = 0, accessSize = HalfWordAccess in
-defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>;
-
-//===----------------------------------------------------------------------===//
-// GP-relative stores.
-// mem[bhwd](#global)=Rt
-// Once predicated, these instructions map to absolute addressing mode.
-// if ([!]Pv[.new]) mem[bhwd](##global)=Rt
-//===----------------------------------------------------------------------===//
-
-let Uses = [GP], isAsmParserOnly = 1 in
-class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC,
- Operand ImmOp, bits<2> MajOp, bit isHalf = 0>
- : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, 0, isHalf> {
- // Set BaseOpcode same as absolute addressing instructions so that
- // non-predicated GP-Rel instructions can have relate with predicated
- // Absolute instruction.
- let BaseOpcode = BaseOp#_abs;
- }
-
-let Uses = [GP], isAsmParserOnly = 1 in
-multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp,
- bits<2> MajOp, bit isHalf = 0> {
- // Set BaseOpcode same as absolute addressing instructions so that
- // non-predicated GP-Rel instructions can have relate with predicated
- // Absolute instruction.
- let BaseOpcode = BaseOp#_abs in {
- def NAME#gp : T_StoreAbsGP <mnemonic, IntRegs, ImmOp, MajOp,
- 0, isHalf>;
- // New-value store
- def NAME#newgp : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp> ;
- }
-}
-
-let accessSize = ByteAccess in
-defm S2_storerb : ST_GP<"memb", "STrib", u16_0Imm, 0b00>, NewValueRel;
-
-let accessSize = HalfWordAccess in
-defm S2_storerh : ST_GP<"memh", "STrih", u16_1Imm, 0b01>, NewValueRel;
-
-let accessSize = WordAccess in
-defm S2_storeri : ST_GP<"memw", "STriw", u16_2Imm, 0b10>, NewValueRel;
-
-let isNVStorable = 0, accessSize = DoubleWordAccess in
-def S2_storerdgp : T_StoreGP <"memd", "STrid", DoubleRegs,
- u16_3Imm, 0b11>, PredNewRel;
-
-let isNVStorable = 0, accessSize = HalfWordAccess in
-def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs,
- u16_1Imm, 0b01, 1>, PredNewRel;
-
-//===----------------------------------------------------------------------===//
-// Template class for non predicated load instructions with
-// absolute addressing mode.
-//===----------------------------------------------------------------------===//
-let isPredicable = 1, hasSideEffects = 0 in
-class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<3> MajOp>
- : LDInst <(outs RC:$dst), (ins ImmOp:$addr),
- "$dst = "#mnemonic# "(#$addr)",
- [], "", V2LDST_tc_ld_SLOT01> {
- bits<5> dst;
- bits<19> addr;
- bits<16> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
- !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
- !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
- /* u16_0Imm */ addr{15-0})));
-
- let IClass = 0b0100;
-
- let Inst{27} = 0b1;
- let Inst{26-25} = offsetBits{15-14};
- let Inst{24} = 0b1;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = offsetBits{13-9};
- let Inst{13-5} = offsetBits{8-0};
- let Inst{4-0} = dst;
- }
-
-class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<3> MajOp>
- : T_LoadAbsGP <mnemonic, RC, u32_0MustExt, MajOp>, AddrModeRel {
-
- string ImmOpStr = !cast<string>(ImmOp);
- let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
- !if (!eq(ImmOpStr, "u16_2Imm"), 18,
- !if (!eq(ImmOpStr, "u16_1Imm"), 17,
- /* u16_0Imm */ 16)));
-
- let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
- !if (!eq(ImmOpStr, "u16_2Imm"), 2,
- !if (!eq(ImmOpStr, "u16_1Imm"), 1,
- /* u16_0Imm */ 0)));
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated load instructions with
-// absolute addressing mode.
-//===----------------------------------------------------------------------===//
-let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opExtentBits = 6,
- opExtendable = 2 in
-class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
- bit isPredNot, bit isPredNew>
- : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32_0MustExt:$absaddr),
- !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel {
- bits<5> dst;
- bits<2> src1;
- bits<6> absaddr;
-
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = isPredNot;
- let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
-
- let IClass = 0b1001;
-
- let Inst{27-24} = 0b1111;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = absaddr{5-1};
- let Inst{13} = 0b1;
- let Inst{12} = isPredNew;
- let Inst{11} = isPredNot;
- let Inst{10-9} = src1;
- let Inst{8} = absaddr{0};
- let Inst{7} = 0b1;
- let Inst{4-0} = dst;
- }
-
-//===----------------------------------------------------------------------===//
-// Multiclass for the load instructions with absolute addressing mode.
-//===----------------------------------------------------------------------===//
-multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bits<3> MajOp,
- bit PredNot> {
- def _abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 0>;
- // Predicate new
- def new_abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 1>;
-}
-
-let addrMode = Absolute, isExtended = 1 in
-multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, bits<3> MajOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
- let opExtendable = 1, isPredicable = 1 in
- def PS_#NAME#abs: T_LoadAbs <mnemonic, RC, ImmOp, MajOp>;
-
- // Predicated
- defm L4_p#NAME#t : LD_Abs_Pred<mnemonic, RC, MajOp, 0>;
- defm L4_p#NAME#f : LD_Abs_Pred<mnemonic, RC, MajOp, 1>;
- }
-}
-
-let accessSize = ByteAccess, hasNewValue = 1 in {
- defm loadrb : LD_Abs<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>;
- defm loadrub : LD_Abs<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>;
-}
-
-let accessSize = HalfWordAccess, hasNewValue = 1 in {
- defm loadrh : LD_Abs<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>;
- defm loadruh : LD_Abs<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>;
-}
-
-let accessSize = WordAccess, hasNewValue = 1 in
-defm loadri : LD_Abs<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>;
-
-let accessSize = DoubleWordAccess in
-defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
-
-//===----------------------------------------------------------------------===//
-// multiclass for load instructions with GP-relative addressing mode.
-// Rx=mem[bhwd](##global)
-// Once predicated, these instructions map to absolute addressing mode.
-// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
-//===----------------------------------------------------------------------===//
-
-let isAsmParserOnly = 1, Uses = [GP] in
-class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
- bits<3> MajOp>
- : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel {
- let BaseOpcode = BaseOp#_abs;
- }
-
-let accessSize = ByteAccess, hasNewValue = 1 in {
- def L2_loadrbgp : T_LoadGP<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>;
- def L2_loadrubgp : T_LoadGP<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>;
-}
-
-let accessSize = HalfWordAccess, hasNewValue = 1 in {
- def L2_loadrhgp : T_LoadGP<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>;
- def L2_loadruhgp : T_LoadGP<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>;
-}
-
-let accessSize = WordAccess, hasNewValue = 1 in
-def L2_loadrigp : T_LoadGP<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>;
-
-let accessSize = DoubleWordAccess in
-def L2_loadrdgp : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
-
-//===----------------------------------------------------------------------===//
-// :raw for of boundscheck:hi:lo insns
-//===----------------------------------------------------------------------===//
-
-// A4_boundscheck_lo: Detect if a register is within bounds.
-let hasSideEffects = 0 in
-def A4_boundscheck_lo: ALU64Inst <
- (outs PredRegs:$Pd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Pd = boundscheck($Rss, $Rtt):raw:lo"> {
- bits<2> Pd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b00100;
- let Inst{13} = 0b1;
- let Inst{7-5} = 0b100;
- let Inst{1-0} = Pd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-// A4_boundscheck_hi: Detect if a register is within bounds.
-let hasSideEffects = 0 in
-def A4_boundscheck_hi: ALU64Inst <
- (outs PredRegs:$Pd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Pd = boundscheck($Rss, $Rtt):raw:hi"> {
- bits<2> Pd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b00100;
- let Inst{13} = 0b1;
- let Inst{7-5} = 0b101;
- let Inst{1-0} = Pd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-let hasSideEffects = 0, isAsmParserOnly = 1 in
-def A4_boundscheck : MInst <
- (outs PredRegs:$Pd), (ins IntRegs:$Rs, DoubleRegs:$Rtt),
- "$Pd=boundscheck($Rs,$Rtt)">;
-
-// A4_tlbmatch: Detect if a VA/ASID matches a TLB entry.
-let isPredicateLate = 1, hasSideEffects = 0 in
-def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd),
- (ins DoubleRegs:$Rs, IntRegs:$Rt),
- "$Pd = tlbmatch($Rs, $Rt)",
- [], "", ALU64_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-23} = 0b00100;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b1;
- let Inst{12-8} = Rt;
- let Inst{7-5} = 0b011;
- let Inst{1-0} = Pd;
- }
-
-// Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't
-// really do a load.
-let hasSideEffects = 1, mayLoad = 0 in
-def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3),
- "dcfetch($Rs + #$u11_3)",
- [], "", LD_tc_ld_SLOT0> {
- bits<5> Rs;
- bits<14> u11_3;
-
- let IClass = 0b1001;
- let Inst{27-21} = 0b0100000;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{10-0} = u11_3{13-3};
-}
-
-
-//===----------------------------------------------------------------------===//
-// Compound instructions
-//===----------------------------------------------------------------------===//
-
-let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
- isPredicated = 1, isPredicatedNew = 1, isExtendable = 1,
- opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
- isTerminator = 1 in
-class CJInst_tstbit_R0<string px, bit np, string tnt>
- : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
- ""#px#" = tstbit($Rs, #0); if ("
- #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon {
- bits<4> Rs;
- bits<11> r9_2;
-
- // np: !p[01]
- let isPredicatedFalse = np;
- // tnt: Taken/Not Taken
- let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
- let isTaken = !if (!eq(tnt, "t"), 1, 0);
-
- let IClass = 0b0001;
- let Inst{27-26} = 0b00;
- let Inst{25} = !if (!eq(px, "!p1"), 1,
- !if (!eq(px, "p1"), 1, 0));
- let Inst{24-23} = 0b11;
- let Inst{22} = np;
- let Inst{21-20} = r9_2{10-9};
- let Inst{19-16} = Rs;
- let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
- let Inst{9-8} = 0b11;
- let Inst{7-1} = r9_2{8-2};
-}
-
-let Defs = [PC, P0], Uses = [P0] in {
- def J4_tstbit0_tp0_jump_nt : CJInst_tstbit_R0<"p0", 0, "nt">;
- def J4_tstbit0_tp0_jump_t : CJInst_tstbit_R0<"p0", 0, "t">;
- def J4_tstbit0_fp0_jump_nt : CJInst_tstbit_R0<"p0", 1, "nt">;
- def J4_tstbit0_fp0_jump_t : CJInst_tstbit_R0<"p0", 1, "t">;
-}
-
-let Defs = [PC, P1], Uses = [P1] in {
- def J4_tstbit0_tp1_jump_nt : CJInst_tstbit_R0<"p1", 0, "nt">;
- def J4_tstbit0_tp1_jump_t : CJInst_tstbit_R0<"p1", 0, "t">;
- def J4_tstbit0_fp1_jump_nt : CJInst_tstbit_R0<"p1", 1, "nt">;
- def J4_tstbit0_fp1_jump_t : CJInst_tstbit_R0<"p1", 1, "t">;
-}
-
-
-let isBranch = 1, hasSideEffects = 0,
- isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1,
- isExtendable = 1, opExtentBits = 11, opExtentAlign = 2,
- opExtendable = 2, isTerminator = 1 in
-class CJInst_RR<string px, string op, bit np, string tnt>
- : InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2),
- ""#px#" = cmp."#op#"($Rs, $Rt); if ("
- #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon {
- bits<4> Rs;
- bits<4> Rt;
- bits<11> r9_2;
-
- // np: !p[01]
- let isPredicatedFalse = np;
- // tnt: Taken/Not Taken
- let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
- let isTaken = !if (!eq(tnt, "t"), 1, 0);
-
- let IClass = 0b0001;
- let Inst{27-23} = !if (!eq(op, "eq"), 0b01000,
- !if (!eq(op, "gt"), 0b01001,
- !if (!eq(op, "gtu"), 0b01010, 0)));
- let Inst{22} = np;
- let Inst{21-20} = r9_2{10-9};
- let Inst{19-16} = Rs;
- let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
- // px: Predicate reg 0/1
- let Inst{12} = !if (!eq(px, "!p1"), 1,
- !if (!eq(px, "p1"), 1, 0));
- let Inst{11-8} = Rt;
- let Inst{7-1} = r9_2{8-2};
-}
-
-// P[10] taken/not taken.
-multiclass T_tnt_CJInst_RR<string op, bit np> {
- let Defs = [PC, P0], Uses = [P0] in {
- def NAME#p0_jump_nt : CJInst_RR<"p0", op, np, "nt">;
- def NAME#p0_jump_t : CJInst_RR<"p0", op, np, "t">;
- }
- let Defs = [PC, P1], Uses = [P1] in {
- def NAME#p1_jump_nt : CJInst_RR<"p1", op, np, "nt">;
- def NAME#p1_jump_t : CJInst_RR<"p1", op, np, "t">;
- }
-}
-// Predicate / !Predicate
-multiclass T_pnp_CJInst_RR<string op>{
- defm J4_cmp#NAME#_t : T_tnt_CJInst_RR<op, 0>;
- defm J4_cmp#NAME#_f : T_tnt_CJInst_RR<op, 1>;
-}
-// TypeCJ Instructions compare RR and jump
-defm eq : T_pnp_CJInst_RR<"eq">;
-defm gt : T_pnp_CJInst_RR<"gt">;
-defm gtu : T_pnp_CJInst_RR<"gtu">;
-
-let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
- isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11,
- opExtentAlign = 2, opExtendable = 2, isTerminator = 1 in
-class CJInst_RU5<string px, string op, bit np, string tnt>
- : InstHexagon<(outs), (ins IntRegs:$Rs, u5_0Imm:$U5, brtarget:$r9_2),
- ""#px#" = cmp."#op#"($Rs, #$U5); if ("
- #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon {
- bits<4> Rs;
- bits<5> U5;
- bits<11> r9_2;
-
- // np: !p[01]
- let isPredicatedFalse = np;
- // tnt: Taken/Not Taken
- let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
- let isTaken = !if (!eq(tnt, "t"), 1, 0);
-
- let IClass = 0b0001;
- let Inst{27-26} = 0b00;
- // px: Predicate reg 0/1
- let Inst{25} = !if (!eq(px, "!p1"), 1,
- !if (!eq(px, "p1"), 1, 0));
- let Inst{24-23} = !if (!eq(op, "eq"), 0b00,
- !if (!eq(op, "gt"), 0b01,
- !if (!eq(op, "gtu"), 0b10, 0)));
- let Inst{22} = np;
- let Inst{21-20} = r9_2{10-9};
- let Inst{19-16} = Rs;
- let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
- let Inst{12-8} = U5;
- let Inst{7-1} = r9_2{8-2};
-}
-// P[10] taken/not taken.
-multiclass T_tnt_CJInst_RU5<string op, bit np> {
- let Defs = [PC, P0], Uses = [P0] in {
- def NAME#p0_jump_nt : CJInst_RU5<"p0", op, np, "nt">;
- def NAME#p0_jump_t : CJInst_RU5<"p0", op, np, "t">;
- }
- let Defs = [PC, P1], Uses = [P1] in {
- def NAME#p1_jump_nt : CJInst_RU5<"p1", op, np, "nt">;
- def NAME#p1_jump_t : CJInst_RU5<"p1", op, np, "t">;
- }
-}
-// Predicate / !Predicate
-multiclass T_pnp_CJInst_RU5<string op>{
- defm J4_cmp#NAME#i_t : T_tnt_CJInst_RU5<op, 0>;
- defm J4_cmp#NAME#i_f : T_tnt_CJInst_RU5<op, 1>;
-}
-// TypeCJ Instructions compare RI and jump
-defm eq : T_pnp_CJInst_RU5<"eq">;
-defm gt : T_pnp_CJInst_RU5<"gt">;
-defm gtu : T_pnp_CJInst_RU5<"gtu">;
-
-let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
- isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1,
- isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 2,
- isTerminator = 1 in
-class CJInst_Rn1<string px, string op, bit np, string tnt>
- : InstHexagon<(outs), (ins IntRegs:$Rs, n1Const:$n1, brtarget:$r9_2),
- ""#px#" = cmp."#op#"($Rs,#$n1); if ("
- #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon {
- bits<4> Rs;
- bits<11> r9_2;
-
- // np: !p[01]
- let isPredicatedFalse = np;
- // tnt: Taken/Not Taken
- let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
- let isTaken = !if (!eq(tnt, "t"), 1, 0);
-
- let IClass = 0b0001;
- let Inst{27-26} = 0b00;
- let Inst{25} = !if (!eq(px, "!p1"), 1,
- !if (!eq(px, "p1"), 1, 0));
-
- let Inst{24-23} = 0b11;
- let Inst{22} = np;
- let Inst{21-20} = r9_2{10-9};
- let Inst{19-16} = Rs;
- let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
- let Inst{9-8} = !if (!eq(op, "eq"), 0b00,
- !if (!eq(op, "gt"), 0b01, 0));
- let Inst{7-1} = r9_2{8-2};
-}
-
-// P[10] taken/not taken.
-multiclass T_tnt_CJInst_Rn1<string op, bit np> {
- let Defs = [PC, P0], Uses = [P0] in {
- def NAME#p0_jump_nt : CJInst_Rn1<"p0", op, np, "nt">;
- def NAME#p0_jump_t : CJInst_Rn1<"p0", op, np, "t">;
- }
- let Defs = [PC, P1], Uses = [P1] in {
- def NAME#p1_jump_nt : CJInst_Rn1<"p1", op, np, "nt">;
- def NAME#p1_jump_t : CJInst_Rn1<"p1", op, np, "t">;
- }
-}
-// Predicate / !Predicate
-multiclass T_pnp_CJInst_Rn1<string op>{
- defm J4_cmp#NAME#n1_t : T_tnt_CJInst_Rn1<op, 0>;
- defm J4_cmp#NAME#n1_f : T_tnt_CJInst_Rn1<op, 1>;
-}
-// TypeCJ Instructions compare -1 and jump
-defm eq : T_pnp_CJInst_Rn1<"eq">;
-defm gt : T_pnp_CJInst_Rn1<"gt">;
-
-// J4_jumpseti: Direct unconditional jump and set register to immediate.
-let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
- isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
- opExtentAlign = 2, opExtendable = 2 in
-def J4_jumpseti: CJInst_JMPSET <
- (outs IntRegs:$Rd),
- (ins u6_0Imm:$U6, brtarget:$r9_2),
- "$Rd = #$U6 ; jump $r9_2"> {
- bits<4> Rd;
- bits<6> U6;
- bits<11> r9_2;
-
- let IClass = 0b0001;
- let Inst{27-24} = 0b0110;
- let Inst{21-20} = r9_2{10-9};
- let Inst{19-16} = Rd;
- let Inst{13-8} = U6;
- let Inst{7-1} = r9_2{8-2};
- }
-
-// J4_jumpsetr: Direct unconditional jump and transfer register.
-let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
- isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
- opExtentAlign = 2, opExtendable = 2 in
-def J4_jumpsetr: CJInst_JMPSET <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs, brtarget:$r9_2),
- "$Rd = $Rs ; jump $r9_2"> {
- bits<4> Rd;
- bits<4> Rs;
- bits<11> r9_2;
-
- let IClass = 0b0001;
- let Inst{27-24} = 0b0111;
- let Inst{21-20} = r9_2{10-9};
- let Inst{11-8} = Rd;
- let Inst{19-16} = Rs;
- let Inst{7-1} = r9_2{8-2};
- }
-
-// Duplex instructions
-//===----------------------------------------------------------------------===//
-include "HexagonIsetDx.td"
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td
deleted file mode 100644
index cd19b6916f21..000000000000
--- a/lib/Target/Hexagon/HexagonInstrInfoV5.td
+++ /dev/null
@@ -1,497 +0,0 @@
-//=- HexagonInstrInfoV5.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V5 instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XTYPE/MPY
-//===----------------------------------------------------------------------===//
-
- //Rdd[+]=vrmpybsu(Rss,Rtt)
-let Predicates = [HasV5T] in {
- def M5_vrmpybsu: T_XTYPE_Vect<"vrmpybsu", 0b110, 0b001, 0>;
- def M5_vrmacbsu: T_XTYPE_Vect_acc<"vrmpybsu", 0b110, 0b001, 0>;
-
- //Rdd[+]=vrmpybu(Rss,Rtt)
- def M5_vrmpybuu: T_XTYPE_Vect<"vrmpybu", 0b100, 0b001, 0>;
- def M5_vrmacbuu: T_XTYPE_Vect_acc<"vrmpybu", 0b100, 0b001, 0>;
-
- def M5_vdmpybsu: T_M2_vmpy<"vdmpybsu", 0b101, 0b001, 0, 0, 1>;
- def M5_vdmacbsu: T_M2_vmpy_acc_sat <"vdmpybsu", 0b001, 0b001, 0, 0>;
-}
-
-// Vector multiply bytes
-// Rdd=vmpyb[s]u(Rs,Rt)
-let Predicates = [HasV5T] in {
- def M5_vmpybsu: T_XTYPE_mpy64 <"vmpybsu", 0b010, 0b001, 0, 0, 0>;
- def M5_vmpybuu: T_XTYPE_mpy64 <"vmpybu", 0b100, 0b001, 0, 0, 0>;
-
- // Rxx+=vmpyb[s]u(Rs,Rt)
- def M5_vmacbsu: T_XTYPE_mpy64_acc <"vmpybsu", "+", 0b110, 0b001, 0, 0, 0>;
- def M5_vmacbuu: T_XTYPE_mpy64_acc <"vmpybu", "+", 0b100, 0b001, 0, 0, 0>;
-
- // Rd=vaddhub(Rss,Rtt):sat
- let hasNewValue = 1, opNewValue = 0 in
- def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>;
-}
-
-def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6_0Imm, [], 1>,
- Requires<[HasV5T]> {
- bits<6> src2;
- let Inst{13-8} = src2;
-}
-
-let isAsmParserOnly = 1 in
-def S2_asr_i_p_rnd_goodsyntax
- : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6_0Imm:$src2),
- "$dst = asrrnd($src1, #$src2)">;
-
-def C4_fastcorner9 : T_LOGICAL_2OP<"fastcorner9", 0b000, 0, 0>,
- Requires<[HasV5T]> {
- let Inst{13,7,4} = 0b111;
-}
-
-def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>,
- Requires<[HasV5T]> {
- let Inst{20,13,7,4} = 0b1111;
-}
-
-let hasNewValue = 1, validSubTargets = HasV5SubT in
-def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
- "$Rd = popcount($Rss)", [], "", S_2op_tc_2_SLOT23>,
- Requires<[HasV5T]> {
- bits<5> Rd;
- bits<5> Rss;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b1000011;
- let Inst{7-5} = 0b011;
- let Inst{4-0} = Rd;
- let Inst{20-16} = Rss;
- }
-
-let isFP = 1, hasNewValue = 1, opNewValue = 0 in
-class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp>
- : MInst<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = "#mnemonic#"($Rs, $Rt)", [],
- "" , M_tc_3or4x_SLOT23 > ,
- Requires<[HasV5T]> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1011;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-8} = Rt;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rd;
- }
-
-let isCommutable = 1 in {
- def F2_sfadd : T_MInstFloat < "sfadd", 0b000, 0b000>;
- def F2_sfmpy : T_MInstFloat < "sfmpy", 0b010, 0b000>;
-}
-
-def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>;
-
-let Itinerary = M_tc_3x_SLOT23 in {
- def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>;
- def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>;
-}
-
-let Itinerary = M_tc_3or4x_SLOT23 in {
-def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>;
-def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>;
-}
-
-// F2_sfrecipa: Reciprocal approximation for division.
-let Uses = [USR], isPredicateLate = 1, isFP = 1,
- hasSideEffects = 0, hasNewValue = 1, Itinerary = M_tc_3or4x_SLOT23 in
-def F2_sfrecipa: MInst <
- (outs IntRegs:$Rd, PredRegs:$Pe),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd, $Pe = sfrecipa($Rs, $Rt)">,
- Requires<[HasV5T]> {
- bits<5> Rd;
- bits<2> Pe;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
- let Inst{27-21} = 0b1011111;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-8} = Rt;
- let Inst{7} = 0b1;
- let Inst{6-5} = Pe;
- let Inst{4-0} = Rd;
- }
-
-// F2_dfcmpeq: Floating point compare for equal.
-let Uses = [USR], isCompare = 1, isFP = 1 in
-class T_fcmp <string mnemonic, RegisterClass RC, bits<3> MinOp,
- list<dag> pattern = [] >
- : ALU64Inst <(outs PredRegs:$dst), (ins RC:$src1, RC:$src2),
- "$dst = "#mnemonic#"($src1, $src2)", pattern,
- "" , ALU64_tc_2early_SLOT23 > ,
- Requires<[HasV5T]> {
- bits<2> dst;
- bits<5> src1;
- bits<5> src2;
-
- let IClass = 0b1101;
-
- let Inst{27-21} = 0b0010111;
- let Inst{20-16} = src1;
- let Inst{12-8} = src2;
- let Inst{7-5} = MinOp;
- let Inst{1-0} = dst;
- }
-
-class T_fcmp64 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
- : T_fcmp <mnemonic, DoubleRegs, MinOp, []> {
- let IClass = 0b1101;
- let Inst{27-21} = 0b0010111;
-}
-
-class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
- : T_fcmp <mnemonic, IntRegs, MinOp, []> {
- let IClass = 0b1100;
- let Inst{27-21} = 0b0111111;
-}
-
-def F2_dfcmpeq : T_fcmp64<"dfcmp.eq", setoeq, 0b000>;
-def F2_dfcmpgt : T_fcmp64<"dfcmp.gt", setogt, 0b001>;
-def F2_dfcmpge : T_fcmp64<"dfcmp.ge", setoge, 0b010>;
-def F2_dfcmpuo : T_fcmp64<"dfcmp.uo", setuo, 0b011>;
-
-def F2_sfcmpge : T_fcmp32<"sfcmp.ge", setoge, 0b000>;
-def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo, 0b001>;
-def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>;
-def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>;
-
-// F2 convert template classes:
-let Uses = [USR], isFP = 1 in
-class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
- string chop ="">
- : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
- "$Rdd = "#mnemonic#"($Rss)"#chop, [], "",
- S_2op_tc_3or4x_SLOT23> {
- bits<5> Rdd;
- bits<5> Rss;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b0000111;
- let Inst{20-16} = Rss;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rdd;
- }
-
-let Uses = [USR], isFP = 1 in
-class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp,
- string chop ="">
- : SInst <(outs DoubleRegs:$Rdd), (ins IntRegs:$Rs),
- "$Rdd = "#mnemonic#"($Rs)"#chop, [], "",
- S_2op_tc_3or4x_SLOT23> {
- bits<5> Rdd;
- bits<5> Rs;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b0100100;
- let Inst{20-16} = Rs;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rdd;
- }
-
-let Uses = [USR], isFP = 1, hasNewValue = 1 in
-class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
- string chop ="">
- : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
- "$Rd = "#mnemonic#"($Rss)"#chop, [], "",
- S_2op_tc_3or4x_SLOT23> {
- bits<5> Rd;
- bits<5> Rss;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = 0b1000;
- let Inst{23-21} = MinOp;
- let Inst{20-16} = Rss;
- let Inst{7-5} = 0b001;
- let Inst{4-0} = Rd;
- }
-
-let Uses = [USR], isFP = 1, hasNewValue = 1 in
-class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp,
- string chop ="">
- : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
- "$Rd = "#mnemonic#"($Rs)"#chop, [], "",
- S_2op_tc_3or4x_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = 0b1011;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rd;
- }
-
-// Convert single precision to double precision and vice-versa.
-def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000>;
-def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000>;
-
-// Convert Integer to Floating Point.
-def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010>;
-def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001>;
-def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000>;
-def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000>;
-def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011>;
-def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010>;
-def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001>;
-def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010>;
-
-// Convert Floating Point to Integer.
-def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101, ":chop">;
-def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111, ":chop">;
-def F2_conv_sf2uw_chop : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b001,
- ":chop">;
-def F2_conv_sf2w_chop : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b001,
- ":chop">;
-def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110, ":chop">;
-def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111, ":chop">;
-def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110, ":chop">;
-def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101, ":chop">;
-
-// Convert Floating Point to Integer: non-chopped.
-let AddedComplexity = 20, Predicates = [HasV5T] in {
- def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000>;
- def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001>;
- def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011>;
- def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100>;
- def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011>;
- def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100>;
- def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000>;
- def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000>;
-}
-
-// Fix up radicand.
-let Uses = [USR], isFP = 1, hasNewValue = 1 in
-def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs),
- "$Rd = sffixupr($Rs)",
- [], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> {
- bits<5> Rd;
- bits<5> Rs;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b1011101;
- let Inst{20-16} = Rs;
- let Inst{7-5} = 0b000;
- let Inst{4-0} = Rd;
- }
-
-// F2_sffma: Floating-point fused multiply add.
-let Uses = [USR], isFP = 1, hasNewValue = 1 in
-class T_sfmpy_acc <bit isSub, bit isLib>
- : MInst<(outs IntRegs:$Rx),
- (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
- "$Rx "#!if(isSub, "-=","+=")#" sfmpy($Rs, $Rt)"#!if(isLib, ":lib",""),
- [], "$dst2 = $Rx" , M_tc_3or4x_SLOT23 > ,
- Requires<[HasV5T]> {
- bits<5> Rx;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-21} = 0b1111000;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-8} = Rt;
- let Inst{7} = 0b1;
- let Inst{6} = isLib;
- let Inst{5} = isSub;
- let Inst{4-0} = Rx;
- }
-
-def F2_sffma: T_sfmpy_acc <0, 0>;
-def F2_sffms: T_sfmpy_acc <1, 0>;
-def F2_sffma_lib: T_sfmpy_acc <0, 1>;
-def F2_sffms_lib: T_sfmpy_acc <1, 1>;
-
-// Floating-point fused multiply add w/ additional scaling (2**pu).
-let Uses = [USR], isFP = 1, hasNewValue = 1 in
-def F2_sffma_sc: MInst <
- (outs IntRegs:$Rx),
- (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu),
- "$Rx += sfmpy($Rs, $Rt, $Pu):scale" ,
- [], "$dst2 = $Rx" , M_tc_3or4x_SLOT23 > ,
- Requires<[HasV5T]> {
- bits<5> Rx;
- bits<5> Rs;
- bits<5> Rt;
- bits<2> Pu;
-
- let IClass = 0b1110;
-
- let Inst{27-21} = 0b1111011;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-8} = Rt;
- let Inst{7} = 0b1;
- let Inst{6-5} = Pu;
- let Inst{4-0} = Rx;
- }
-
-//===----------------------------------------------------------------------===//
-// :natural forms of vasrh and vasrhub insns
-//===----------------------------------------------------------------------===//
-// S5_asrhub_rnd_sat: Vector arithmetic shift right by immediate with round,
-// saturate, and pack.
-let Defs = [USR_OVF], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-class T_ASRHUB<bit isSat>
- : SInst <(outs IntRegs:$Rd),
- (ins DoubleRegs:$Rss, u4_0Imm:$u4),
- "$Rd = vasrhub($Rss, #$u4):"#!if(isSat, "sat", "raw"),
- [], "", S_2op_tc_2_SLOT23>,
- Requires<[HasV5T]> {
- bits<5> Rd;
- bits<5> Rss;
- bits<4> u4;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b1000011;
- let Inst{20-16} = Rss;
- let Inst{13-12} = 0b00;
- let Inst{11-8} = u4;
- let Inst{7-6} = 0b10;
- let Inst{5} = isSat;
- let Inst{4-0} = Rd;
- }
-
-def S5_asrhub_rnd_sat : T_ASRHUB <0>;
-def S5_asrhub_sat : T_ASRHUB <1>;
-
-let isAsmParserOnly = 1 in
-def S5_asrhub_rnd_sat_goodsyntax
- : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4_0Imm:$u4),
- "$Rd = vasrhub($Rss, #$u4):rnd:sat">, Requires<[HasV5T]>;
-
-// S5_vasrhrnd: Vector arithmetic shift right by immediate with round.
-let hasSideEffects = 0 in
-def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$Rss, u4_0Imm:$u4),
- "$Rdd = vasrh($Rss, #$u4):raw">,
- Requires<[HasV5T]> {
- bits<5> Rdd;
- bits<5> Rss;
- bits<4> u4;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b0000001;
- let Inst{20-16} = Rss;
- let Inst{13-12} = 0b00;
- let Inst{11-8} = u4;
- let Inst{7-5} = 0b000;
- let Inst{4-0} = Rdd;
- }
-
-let isAsmParserOnly = 1 in
-def S5_vasrhrnd_goodsyntax
- : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4_0Imm:$u4),
- "$Rdd = vasrh($Rss,#$u4):rnd">, Requires<[HasV5T]>;
-
-// Floating point reciprocal square root approximation
-let Uses = [USR], isPredicateLate = 1, isFP = 1,
- hasSideEffects = 0, hasNewValue = 1, opNewValue = 0,
- validSubTargets = HasV5SubT in
-def F2_sfinvsqrta: SInst <
- (outs IntRegs:$Rd, PredRegs:$Pe),
- (ins IntRegs:$Rs),
- "$Rd, $Pe = sfinvsqrta($Rs)" > ,
- Requires<[HasV5T]> {
- bits<5> Rd;
- bits<2> Pe;
- bits<5> Rs;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b1011111;
- let Inst{20-16} = Rs;
- let Inst{7} = 0b0;
- let Inst{6-5} = Pe;
- let Inst{4-0} = Rd;
- }
-
-// Complex multiply 32x16
-let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
- def M4_cmpyi_whc : T_S3op_8<"cmpyiwh", 0b101, 1, 1, 1, 1>;
- def M4_cmpyr_whc : T_S3op_8<"cmpyrwh", 0b111, 1, 1, 1, 1>;
-}
-
-// Classify floating-point value
-let Uses = [USR], isFP = 1 in
-def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>, Requires<[HasV5T]>;
-
-let Uses = [USR], isFP = 1 in
-def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5_0Imm:$u5),
- "$Pd = dfclass($Rss, #$u5)",
- [], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> {
- bits<2> Pd;
- bits<5> Rss;
- bits<5> u5;
-
- let IClass = 0b1101;
- let Inst{27-21} = 0b1100100;
- let Inst{20-16} = Rss;
- let Inst{12-10} = 0b000;
- let Inst{9-5} = u5;
- let Inst{4-3} = 0b10;
- let Inst{1-0} = Pd;
- }
-
-// Instructions to create floating point constant
-class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg>
- : ALU64Inst<(outs RC:$dst), (ins u10_0Imm:$src),
- "$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"),
- [], "", ALU64_tc_2_SLOT23>, Requires<[HasV5T]> {
- bits<5> dst;
- bits<10> src;
-
- let IClass = 0b1101;
- let Inst{27-24} = RegType;
- let Inst{23} = 0b0;
- let Inst{22} = isNeg;
- let Inst{21} = src{9};
- let Inst{13-5} = src{8-0};
- let Inst{4-0} = dst;
- }
-
-let hasNewValue = 1, opNewValue = 0 in {
- def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>;
- def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>;
-}
-
-def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>;
-def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV60.td b/lib/Target/Hexagon/HexagonInstrInfoV60.td
deleted file mode 100644
index c50141b18ead..000000000000
--- a/lib/Target/Hexagon/HexagonInstrInfoV60.td
+++ /dev/null
@@ -1,2068 +0,0 @@
-//=- HexagonInstrInfoV60.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V60 instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-// Vector load
-let Predicates = [HasV60T, UseHVX] in
-let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
- class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = CVI_VM_LD,
- IType type = TypeCVI_VM_LD>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
-
-// Vector store
-let Predicates = [HasV60T, UseHVX] in
-let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
-class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = CVI_VM_ST,
- IType type = TypeCVI_VM_ST>
-: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
-
-//===----------------------------------------------------------------------===//
-// Vector loads with base + immediate offset
-//===----------------------------------------------------------------------===//
-let addrMode = BaseImmOffset, accessSize = Vector64Access in
-class T_vload_ai<string asmStr>
- : V6_LDInst <(outs VectorRegs:$dst), (ins IntRegs:$src1, s4_6Imm:$src2),
- asmStr>;
-
-let isCodeGenOnly = 1, addrMode = BaseImmOffset, accessSize = Vector128Access in
-class T_vload_ai_128B<string asmStr>
- : V6_LDInst <(outs VectorRegs128B:$dst), (ins IntRegs:$src1, s4_7Imm:$src2),
- asmStr>;
-
-let isCVLoadable = 1, hasNewValue = 1 in {
- def V6_vL32b_ai : T_vload_ai <"$dst = vmem($src1+#$src2)">,
- V6_vL32b_ai_enc;
- def V6_vL32b_nt_ai : T_vload_ai <"$dst = vmem($src1+#$src2):nt">,
- V6_vL32b_nt_ai_enc;
- // 128B
- def V6_vL32b_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2)">,
- V6_vL32b_ai_128B_enc;
- def V6_vL32b_nt_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2):nt">,
- V6_vL32b_nt_ai_128B_enc;
-}
-
-let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU, hasNewValue = 1 in {
- def V6_vL32Ub_ai : T_vload_ai <"$dst = vmemu($src1+#$src2)">,
- V6_vL32Ub_ai_enc;
- def V6_vL32Ub_ai_128B : T_vload_ai_128B <"$dst = vmemu($src1+#$src2)">,
- V6_vL32Ub_ai_128B_enc;
-}
-
-let Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD, isCVLoad = 1,
- hasNewValue = 1 in {
- def V6_vL32b_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2)">,
- V6_vL32b_cur_ai_enc;
- def V6_vL32b_nt_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2):nt">,
- V6_vL32b_nt_cur_ai_enc;
- // 128B
- def V6_vL32b_cur_ai_128B : T_vload_ai_128B
- <"$dst.cur = vmem($src1+#$src2)">,
- V6_vL32b_cur_ai_128B_enc;
- def V6_vL32b_nt_cur_ai_128B : T_vload_ai_128B
- <"$dst.cur = vmem($src1+#$src2):nt">,
- V6_vL32b_nt_cur_ai_128B_enc;
-}
-
-
-let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD, hasNewValue = 1 in {
- def V6_vL32b_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2)">,
- V6_vL32b_tmp_ai_enc;
- def V6_vL32b_nt_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2):nt">,
- V6_vL32b_nt_tmp_ai_enc;
- // 128B
- def V6_vL32b_tmp_ai_128B : T_vload_ai_128B
- <"$dst.tmp = vmem($src1+#$src2)">,
- V6_vL32b_tmp_ai_128B_enc;
- def V6_vL32b_nt_tmp_ai_128B : T_vload_ai_128B
- <"$dst.tmp = vmem($src1+#$src2)">,
- V6_vL32b_nt_tmp_ai_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Vector stores with base + immediate offset - unconditional
-//===----------------------------------------------------------------------===//
-let addrMode = BaseImmOffset, accessSize = Vector64Access, isPredicable = 1 in
-class T_vstore_ai <string mnemonic, string baseOp, Operand ImmOp,
- RegisterClass RC, bit isNT>
- : V6_STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- mnemonic#"($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3">, NewValueRel {
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_ai_64B <string mnemonic, string baseOp, bit isNT = 0>
- : T_vstore_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_ai_128B <string mnemonic, string baseOp, bit isNT = 0>
- : T_vstore_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
-
-let isNVStorable = 1 in {
- def V6_vS32b_ai : T_vstore_ai_64B <"vmem", "vS32b_ai">,
- V6_vS32b_ai_enc;
- def V6_vS32b_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai">,
- V6_vS32b_ai_128B_enc;
-}
-
-let isNVStorable = 1, isNonTemporal = 1 in {
- def V6_vS32b_nt_ai : T_vstore_ai_64B <"vmem", "vS32b_ai", 1>,
- V6_vS32b_nt_ai_enc;
- def V6_vS32b_nt_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai", 1>,
- V6_vS32b_nt_ai_128B_enc;
-}
-
-let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
- def V6_vS32Ub_ai : T_vstore_ai_64B <"vmemu", "vS32Ub_ai">,
- V6_vS32Ub_ai_enc;
- def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vS32Ub_ai">,
- V6_vS32Ub_ai_128B_enc;
-}
-//===----------------------------------------------------------------------===//
-// Vector stores with base + immediate offset - unconditional new
-//===----------------------------------------------------------------------===//
-let addrMode = BaseImmOffset, isNewValue = 1, opNewValue = 2, isNVStore = 1,
- isPredicable = 1, Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in
-class T_vstore_new_ai <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
- : V6_STInst <(outs ), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- "vmem($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3.new">, NewValueRel {
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_new_ai_64B <string baseOp, bit isNT = 0>
- : T_vstore_new_ai <baseOp, s4_6Imm, VectorRegs, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_new_ai_128B <string baseOp, bit isNT = 0>
- : T_vstore_new_ai <baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
-
-def V6_vS32b_new_ai : T_vstore_new_ai_64B <"vS32b_ai">, V6_vS32b_new_ai_enc;
-def V6_vS32b_new_ai_128B : T_vstore_new_ai_128B <"vS32b_ai">,
- V6_vS32b_new_ai_128B_enc;
-
-let isNonTemporal = 1 in {
- def V6_vS32b_nt_new_ai : T_vstore_new_ai_64B<"vS32b_ai", 1>,
- V6_vS32b_nt_new_ai_enc;
- def V6_vS32b_nt_new_ai_128B : T_vstore_new_ai_128B<"vS32b_ai", 1>,
- V6_vS32b_nt_new_ai_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Vector stores with base + immediate offset - conditional
-//===----------------------------------------------------------------------===//
-let addrMode = BaseImmOffset, isPredicated = 1 in
-class T_vstore_pred_ai <string mnemonic, string baseOp, Operand ImmOp,
- RegisterClass RC, bit isPredNot = 0, bit isNT = 0>
- : V6_STInst <(outs),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- "if ("#!if(isPredNot, "!", "")#"$src1) "
- #mnemonic#"($src2+#$src3)"#!if(isNT, ":nt", "")#" = $src4">, NewValueRel {
- let isPredicatedFalse = isPredNot;
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_pred_ai_64B <string mnemonic, string baseOp,
- bit isPredNot = 0, bit isNT = 0>
- : T_vstore_pred_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_pred_ai_128B <string mnemonic, string baseOp,
- bit isPredNot = 0, bit isNT = 0>
- : T_vstore_pred_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B,
- isPredNot, isNT>;
-
-let isNVStorable = 1 in {
- def V6_vS32b_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai">,
- V6_vS32b_pred_ai_enc;
- def V6_vS32b_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1>,
- V6_vS32b_npred_ai_enc;
- // 128B
- def V6_vS32b_pred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai">,
- V6_vS32b_pred_ai_128B_enc;
- def V6_vS32b_npred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai", 1>,
- V6_vS32b_npred_ai_128B_enc;
-}
-
-
-let isNVStorable = 1, isNonTemporal = 1 in {
- def V6_vS32b_nt_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 0, 1>,
- V6_vS32b_nt_pred_ai_enc;
- def V6_vS32b_nt_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1, 1>,
- V6_vS32b_nt_npred_ai_enc;
- // 128B
- def V6_vS32b_nt_pred_ai_128B : T_vstore_pred_ai_128B
- <"vmem", "vS32b_ai", 0, 1>,
- V6_vS32b_nt_pred_ai_128B_enc;
- def V6_vS32b_nt_npred_ai_128B : T_vstore_pred_ai_128B
- <"vmem", "vS32b_ai", 1, 1>,
- V6_vS32b_nt_npred_ai_128B_enc;
-}
-
-let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
- def V6_vS32Ub_pred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai">,
- V6_vS32Ub_pred_ai_enc;
- def V6_vS32Ub_npred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai", 1>,
- V6_vS32Ub_npred_ai_enc;
- // 128B
- def V6_vS32Ub_pred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai">,
- V6_vS32Ub_pred_ai_128B_enc;
- def V6_vS32Ub_npred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai", 1>,
- V6_vS32Ub_npred_ai_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Vector stores with base + immediate offset - byte-enabled aligned
-//===----------------------------------------------------------------------===//
-let addrMode = BaseImmOffset in
-class T_vstore_qpred_ai <Operand ImmOp, RegisterClass RC,
- bit isPredNot = 0, bit isNT = 0>
- : V6_STInst <(outs),
- (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
- #!if(isNT, ":nt", "")#" = $src4"> {
- let isPredicatedFalse = isPredNot;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_qpred_ai_64B <bit isPredNot = 0, bit isNT = 0>
- : T_vstore_qpred_ai <s4_6Imm, VectorRegs, isPredNot, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_qpred_ai_128B <bit isPredNot = 0, bit isNT = 0>
- : T_vstore_qpred_ai <s4_7Imm, VectorRegs128B, isPredNot, isNT>;
-
-def V6_vS32b_qpred_ai : T_vstore_qpred_ai_64B, V6_vS32b_qpred_ai_enc;
-def V6_vS32b_nqpred_ai : T_vstore_qpred_ai_64B <1>,
- V6_vS32b_nqpred_ai_enc;
-def V6_vS32b_nt_qpred_ai : T_vstore_qpred_ai_64B <0, 1>,
- V6_vS32b_nt_qpred_ai_enc;
-def V6_vS32b_nt_nqpred_ai : T_vstore_qpred_ai_64B <1, 1>,
- V6_vS32b_nt_nqpred_ai_enc;
-// 128B
-def V6_vS32b_qpred_ai_128B : T_vstore_qpred_ai_128B, V6_vS32b_qpred_ai_128B_enc;
-def V6_vS32b_nqpred_ai_128B : T_vstore_qpred_ai_128B<1>,
- V6_vS32b_nqpred_ai_128B_enc;
-def V6_vS32b_nt_qpred_ai_128B : T_vstore_qpred_ai_128B<0, 1>,
- V6_vS32b_nt_qpred_ai_128B_enc;
-def V6_vS32b_nt_nqpred_ai_128B : T_vstore_qpred_ai_128B<1, 1>,
- V6_vS32b_nt_nqpred_ai_128B_enc;
-
-
-//===----------------------------------------------------------------------===//
-// Vector stores with base + immediate offset - conditional new
-//===----------------------------------------------------------------------===//
-let addrMode = BaseImmOffset, isPredicated = 1, isNewValue = 1, opNewValue = 3,
- isNVStore = 1, Type = TypeCVI_VM_NEW_ST, Itinerary = CVI_VM_NEW_ST in
-class T_vstore_new_pred_ai <string baseOp, Operand ImmOp, RegisterClass RC,
- bit isPredNot, bit isNT>
- : V6_STInst <(outs),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
- #!if(isNT, ":nt", "")#" = $src4.new">, NewValueRel {
- let isPredicatedFalse = isPredNot;
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_new_pred_ai_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
- : T_vstore_new_pred_ai <baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_new_pred_ai_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
- : T_vstore_new_pred_ai <baseOp#"128B", s4_7Imm, VectorRegs128B,
- isPredNot, isNT>;
-
-
-def V6_vS32b_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai">,
- V6_vS32b_new_pred_ai_enc;
-def V6_vS32b_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1>,
- V6_vS32b_new_npred_ai_enc;
-// 128B
-def V6_vS32b_new_pred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai">,
- V6_vS32b_new_pred_ai_128B_enc;
-def V6_vS32b_new_npred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai", 1>,
- V6_vS32b_new_npred_ai_128B_enc;
-let isNonTemporal = 1 in {
- def V6_vS32b_nt_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 0, 1>,
- V6_vS32b_nt_new_pred_ai_enc;
- def V6_vS32b_nt_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1, 1>,
- V6_vS32b_nt_new_npred_ai_enc;
- // 128B
- def V6_vS32b_nt_new_pred_ai_128B : T_vstore_new_pred_ai_128B
- <"vS32b_ai", 0, 1>,
- V6_vS32b_nt_new_pred_ai_128B_enc;
- def V6_vS32b_nt_new_npred_ai_128B : T_vstore_new_pred_ai_128B
- <"vS32b_ai", 1, 1>,
- V6_vS32b_nt_new_npred_ai_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment vector loads with immediate offset.
-//===----------------------------------------------------------------------===//
-let addrMode = PostInc, hasNewValue = 1 in
-class T_vload_pi<string asmStr, Operand ImmOp, RegisterClass RC>
- : V6_LDInst <(outs RC:$dst, IntRegs:$_dst_),
- (ins IntRegs:$src1, ImmOp:$src2), asmStr, [],
- "$src1 = $_dst_">;
-
-let accessSize = Vector64Access in
-class T_vload_pi_64B <string asmStr>
- : T_vload_pi <asmStr, s3_6Imm, VectorRegs>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vload_pi_128B <string asmStr>
- : T_vload_pi <asmStr, s3_7Imm, VectorRegs128B>;
-
-let isCVLoadable = 1 in {
- def V6_vL32b_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2)">,
- V6_vL32b_pi_enc;
- def V6_vL32b_nt_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2):nt">,
- V6_vL32b_nt_pi_enc;
- // 128B
- def V6_vL32b_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2)">,
- V6_vL32b_pi_128B_enc;
- def V6_vL32b_nt_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2):nt">,
- V6_vL32b_nt_pi_128B_enc;
-}
-
-let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in {
- def V6_vL32Ub_pi : T_vload_pi_64B <"$dst = vmemu($src1++#$src2)">,
- V6_vL32Ub_pi_enc;
- // 128B
- def V6_vL32Ub_pi_128B : T_vload_pi_128B <"$dst = vmemu($src1++#$src2)">,
- V6_vL32Ub_pi_128B_enc;
-}
-
-let isCVLoad = 1, Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD in {
- def V6_vL32b_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2)">,
- V6_vL32b_cur_pi_enc;
- def V6_vL32b_nt_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2):nt">,
- V6_vL32b_nt_cur_pi_enc;
- // 128B
- def V6_vL32b_cur_pi_128B : T_vload_pi_128B
- <"$dst.cur = vmem($src1++#$src2)">,
- V6_vL32b_cur_pi_128B_enc;
- def V6_vL32b_nt_cur_pi_128B : T_vload_pi_128B
- <"$dst.cur = vmem($src1++#$src2):nt">,
- V6_vL32b_nt_cur_pi_128B_enc;
-}
-
-let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
- def V6_vL32b_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2)">,
- V6_vL32b_tmp_pi_enc;
- def V6_vL32b_nt_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2):nt">,
- V6_vL32b_nt_tmp_pi_enc;
- //128B
- def V6_vL32b_tmp_pi_128B : T_vload_pi_128B
- <"$dst.tmp = vmem($src1++#$src2)">,
- V6_vL32b_tmp_pi_128B_enc;
- def V6_vL32b_nt_tmp_pi_128B : T_vload_pi_128B
- <"$dst.tmp = vmem($src1++#$src2):nt">,
- V6_vL32b_nt_tmp_pi_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment vector stores with immediate offset.
-//===----------------------------------------------------------------------===//
-let addrMode = PostInc, isPredicable = 1 in
-class T_vstore_pi <string mnemonic, string baseOp, Operand ImmOp,
- RegisterClass RC, bit isNT>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- mnemonic#"($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
- "$src1 = $_dst_">, NewValueRel {
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_pi_64B <string mnemonic, string baseOp, bit isNT = 0>
- : T_vstore_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_pi_128B <string mnemonic, string baseOp, bit isNT = 0>
- : T_vstore_pi <mnemonic, baseOp#"128B", s3_7Imm, VectorRegs128B, isNT>;
-
-let isNVStorable = 1 in {
- def V6_vS32b_pi : T_vstore_pi_64B <"vmem", "vS32b_pi">, V6_vS32b_pi_enc;
- def V6_vS32b_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi">,
- V6_vS32b_pi_128B_enc;
-}
-
-let isNVStorable = 1 , isNonTemporal = 1 in {
- def V6_vS32b_nt_pi : T_vstore_pi_64B <"vmem", "vS32b_pi", 1>,
- V6_vS32b_nt_pi_enc;
- def V6_vS32b_nt_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi", 1>,
- V6_vS32b_nt_pi_128B_enc;
-}
-
-
-let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
- def V6_vS32Ub_pi : T_vstore_pi_64B <"vmemu", "vS32Ub_pi">,
- V6_vS32Ub_pi_enc;
- def V6_vS32Ub_pi_128B : T_vstore_pi_128B <"vmemu", "vS32Ub_pi">,
- V6_vS32Ub_pi_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment unconditional .new vector stores with immediate offset.
-//===----------------------------------------------------------------------===//
-let addrMode = PostInc, isNVStore = 1 in
-let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
- isPredicable = 1, opNewValue = 3, isNVStore = 1 in
-class T_vstore_new_pi <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- "vmem($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
- "$src1 = $_dst_">, NewValueRel {
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_new_pi_64B <string baseOp, bit isNT = 0>
- : T_vstore_new_pi <baseOp, s3_6Imm, VectorRegs, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_new_pi_128B <string baseOp, bit isNT = 0>
- : T_vstore_new_pi <baseOp#"128B", s3_7Imm, VectorRegs128B, isNT>;
-
-
-def V6_vS32b_new_pi : T_vstore_new_pi_64B <"vS32b_pi">,
- V6_vS32b_new_pi_enc;
-def V6_vS32b_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi">,
- V6_vS32b_new_pi_128B_enc;
-
-let isNonTemporal = 1 in {
- def V6_vS32b_nt_new_pi : T_vstore_new_pi_64B <"vS32b_pi", 1>,
- V6_vS32b_nt_new_pi_enc;
- def V6_vS32b_nt_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi", 1>,
- V6_vS32b_nt_new_pi_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment conditional vector stores with immediate offset
-//===----------------------------------------------------------------------===//
-let isPredicated = 1, addrMode = PostInc in
-class T_vstore_pred_pi <string mnemonic, string baseOp, Operand ImmOp,
- RegisterClass RC, bit isPredNot, bit isNT>
- : V6_STInst<(outs IntRegs:$_dst_),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++#$src3)"
- #!if(isNT, ":nt", "")#" = $src4", [],
- "$src2 = $_dst_">, NewValueRel {
- let isPredicatedFalse = isPredNot;
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_pred_pi_64B <string mnemonic, string baseOp,
- bit isPredNot = 0, bit isNT = 0>
- : T_vstore_pred_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_pred_pi_128B <string mnemonic, string baseOp,
- bit isPredNot = 0, bit isNT = 0>
- : T_vstore_pred_pi <mnemonic, baseOp#"128B", s3_7Imm, VectorRegs128B,
- isPredNot, isNT>;
-
-let isNVStorable = 1 in {
- def V6_vS32b_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi">,
- V6_vS32b_pred_pi_enc;
- def V6_vS32b_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1>,
- V6_vS32b_npred_pi_enc;
- // 128B
- def V6_vS32b_pred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi">,
- V6_vS32b_pred_pi_128B_enc;
- def V6_vS32b_npred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi", 1>,
- V6_vS32b_npred_pi_128B_enc;
-}
-let isNVStorable = 1, isNonTemporal = 1 in {
- def V6_vS32b_nt_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 0, 1>,
- V6_vS32b_nt_pred_pi_enc;
- def V6_vS32b_nt_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1, 1>,
- V6_vS32b_nt_npred_pi_enc;
- // 128B
- def V6_vS32b_nt_pred_pi_128B : T_vstore_pred_pi_128B
- <"vmem", "vS32b_pi", 0, 1>,
- V6_vS32b_nt_pred_pi_128B_enc;
- def V6_vS32b_nt_npred_pi_128B : T_vstore_pred_pi_128B
- <"vmem", "vS32b_pi", 1, 1>,
- V6_vS32b_nt_npred_pi_128B_enc;
-}
-
-let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
- def V6_vS32Ub_pred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi">,
- V6_vS32Ub_pred_pi_enc;
- def V6_vS32Ub_npred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi", 1>,
- V6_vS32Ub_npred_pi_enc;
- // 128B
- def V6_vS32Ub_pred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi">,
- V6_vS32Ub_pred_pi_128B_enc;
- def V6_vS32Ub_npred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi", 1>,
- V6_vS32Ub_npred_pi_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment vector stores with immediate offset - byte-enabled aligned
-//===----------------------------------------------------------------------===//
-let addrMode = PostInc in
-class T_vstore_qpred_pi <Operand ImmOp, RegisterClass RC, bit isPredNot = 0,
- bit isNT = 0>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
- #!if(isNT, ":nt", "")#" = $src4", [],
- "$src2 = $_dst_">;
-
-let accessSize = Vector64Access in
-class T_vstore_qpred_pi_64B <bit isPredNot = 0, bit isNT = 0>
- : T_vstore_qpred_pi <s3_6Imm, VectorRegs, isPredNot, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_qpred_pi_128B <bit isPredNot = 0, bit isNT = 0>
- : T_vstore_qpred_pi <s3_7Imm, VectorRegs128B, isPredNot, isNT>;
-
-def V6_vS32b_qpred_pi : T_vstore_qpred_pi_64B, V6_vS32b_qpred_pi_enc;
-def V6_vS32b_nqpred_pi : T_vstore_qpred_pi_64B <1>, V6_vS32b_nqpred_pi_enc;
-// 128B
-def V6_vS32b_qpred_pi_128B : T_vstore_qpred_pi_128B,
- V6_vS32b_qpred_pi_128B_enc;
-def V6_vS32b_nqpred_pi_128B : T_vstore_qpred_pi_128B<1>,
- V6_vS32b_nqpred_pi_128B_enc;
-
-let isNonTemporal = 1 in {
- def V6_vS32b_nt_qpred_pi : T_vstore_qpred_pi_64B <0, 1>,
- V6_vS32b_nt_qpred_pi_enc;
- def V6_vS32b_nt_nqpred_pi : T_vstore_qpred_pi_64B <1, 1>,
- V6_vS32b_nt_nqpred_pi_enc;
- // 128B
- def V6_vS32b_nt_qpred_pi_128B : T_vstore_qpred_pi_128B<0, 1>,
- V6_vS32b_nt_qpred_pi_128B_enc;
- def V6_vS32b_nt_nqpred_pi_128B : T_vstore_qpred_pi_128B<1, 1>,
- V6_vS32b_nt_nqpred_pi_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment conditional .new vector stores with immediate offset
-//===----------------------------------------------------------------------===//
-let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
- isNewValue = 1, opNewValue = 4, addrMode = PostInc, isNVStore = 1 in
-class T_vstore_new_pred_pi <string baseOp, Operand ImmOp, RegisterClass RC,
- bit isPredNot, bit isNT>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
- #!if(isNT, ":nt", "")#" = $src4.new", [],
- "$src2 = $_dst_"> , NewValueRel {
- let isPredicatedFalse = isPredNot;
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_new_pred_pi_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
- : T_vstore_new_pred_pi <baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_new_pred_pi_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
- : T_vstore_new_pred_pi <baseOp#"128B", s3_7Imm, VectorRegs128B,
- isPredNot, isNT>;
-
-def V6_vS32b_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi">,
- V6_vS32b_new_pred_pi_enc;
-def V6_vS32b_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1>,
- V6_vS32b_new_npred_pi_enc;
-// 128B
-def V6_vS32b_new_pred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi">,
- V6_vS32b_new_pred_pi_128B_enc;
-def V6_vS32b_new_npred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi", 1>,
- V6_vS32b_new_npred_pi_128B_enc;
-let isNonTemporal = 1 in {
- def V6_vS32b_nt_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 0, 1>,
- V6_vS32b_nt_new_pred_pi_enc;
- def V6_vS32b_nt_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1, 1>,
- V6_vS32b_nt_new_npred_pi_enc;
- // 128B
- def V6_vS32b_nt_new_pred_pi_128B : T_vstore_new_pred_pi_128B
- <"vS32b_pi", 0, 1>,
- V6_vS32b_nt_new_pred_pi_128B_enc;
- def V6_vS32b_nt_new_npred_pi_128B : T_vstore_new_pred_pi_128B
- <"vS32b_pi", 1, 1>,
- V6_vS32b_nt_new_npred_pi_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment vector loads with register offset
-//===----------------------------------------------------------------------===//
-let hasNewValue = 1 in
-class T_vload_ppu<string asmStr>
- : V6_LDInst <(outs VectorRegs:$dst, IntRegs:$_dst_),
- (ins IntRegs:$src1, ModRegs:$src2), asmStr, [],
- "$src1 = $_dst_">, NewValueRel;
-
-let isCVLoadable = 1 in {
- def V6_vL32b_ppu : T_vload_ppu <"$dst = vmem($src1++$src2)">,
- V6_vL32b_ppu_enc;
- def V6_vL32b_nt_ppu : T_vload_ppu <"$dst = vmem($src1++$src2):nt">,
- V6_vL32b_nt_ppu_enc;
-}
-
-let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in
-def V6_vL32Ub_ppu : T_vload_ppu <"$dst = vmemu($src1++$src2)">,
- V6_vL32Ub_ppu_enc;
-
-let isCVLoad = 1, Itinerary = CVI_VM_CUR_LD, Type = TypeCVI_VM_CUR_LD in {
- def V6_vL32b_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2)">,
- V6_vL32b_cur_ppu_enc;
- def V6_vL32b_nt_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2):nt">,
- V6_vL32b_nt_cur_ppu_enc;
-}
-
-let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
- def V6_vL32b_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2)">,
- V6_vL32b_tmp_ppu_enc;
- def V6_vL32b_nt_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2):nt">,
- V6_vL32b_nt_tmp_ppu_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment vector stores with register offset
-//===----------------------------------------------------------------------===//
-let isPredicable = 1 in
-class T_vstore_ppu <string mnemonic, bit isNT = 0>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
- mnemonic#"($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
- "$src1 = $_dst_">, NewValueRel;
-
-let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
- def V6_vS32b_ppu : T_vstore_ppu <"vmem">,
- V6_vS32b_ppu_enc;
- let isNonTemporal = 1, BaseOpcode = "vS32b_ppu" in
- def V6_vS32b_nt_ppu : T_vstore_ppu <"vmem", 1>,
- V6_vS32b_nt_ppu_enc;
-}
-
-let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in
-def V6_vS32Ub_ppu : T_vstore_ppu <"vmemu">, V6_vS32Ub_ppu_enc;
-
-//===----------------------------------------------------------------------===//
-// Post increment .new vector stores with register offset
-//===----------------------------------------------------------------------===//
-let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
- isPredicable = 1, opNewValue = 3, isNVStore = 1 in
-class T_vstore_new_ppu <bit isNT = 0>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
- "vmem($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
- "$src1 = $_dst_">, NewValueRel;
-
-let BaseOpcode = "vS32b_ppu" in
-def V6_vS32b_new_ppu : T_vstore_new_ppu, V6_vS32b_new_ppu_enc;
-
-let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in
-def V6_vS32b_nt_new_ppu : T_vstore_new_ppu<1>, V6_vS32b_nt_new_ppu_enc;
-
-//===----------------------------------------------------------------------===//
-// Post increment conditional .new vector stores with register offset
-//===----------------------------------------------------------------------===//
-let isPredicated = 1 in
-class T_vstore_pred_ppu <string mnemonic, bit isPredNot = 0, bit isNT = 0>
- : V6_STInst<(outs IntRegs:$_dst_),
- (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
- "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++$src3)"
- #!if(isNT, ":nt", "")#" = $src4", [],
- "$src2 = $_dst_">, NewValueRel {
- let isPredicatedFalse = isPredNot;
-}
-
-let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
- def V6_vS32b_pred_ppu : T_vstore_pred_ppu<"vmem">, V6_vS32b_pred_ppu_enc;
- def V6_vS32b_npred_ppu: T_vstore_pred_ppu<"vmem", 1>, V6_vS32b_npred_ppu_enc;
-}
-
-let isNVStorable = 1, BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
- def V6_vS32b_nt_pred_ppu : T_vstore_pred_ppu <"vmem", 0, 1>,
- V6_vS32b_nt_pred_ppu_enc;
- def V6_vS32b_nt_npred_ppu : T_vstore_pred_ppu <"vmem", 1, 1>,
- V6_vS32b_nt_npred_ppu_enc;
-}
-
-let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU,
- Type = TypeCVI_VM_STU in {
- def V6_vS32Ub_pred_ppu : T_vstore_pred_ppu <"vmemu">,
- V6_vS32Ub_pred_ppu_enc;
- def V6_vS32Ub_npred_ppu : T_vstore_pred_ppu <"vmemu", 1>,
- V6_vS32Ub_npred_ppu_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment vector stores with register offset - byte-enabled aligned
-//===----------------------------------------------------------------------===//
-class T_vstore_qpred_ppu <bit isPredNot = 0, bit isNT = 0>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins VecPredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
- "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
- #!if(isNT, ":nt", "")#" = $src4", [],
- "$src2 = $_dst_">, NewValueRel;
-
-def V6_vS32b_qpred_ppu : T_vstore_qpred_ppu, V6_vS32b_qpred_ppu_enc;
-def V6_vS32b_nqpred_ppu : T_vstore_qpred_ppu<1>, V6_vS32b_nqpred_ppu_enc;
-def V6_vS32b_nt_qpred_ppu : T_vstore_qpred_ppu<0, 1>,
- V6_vS32b_nt_qpred_ppu_enc;
-def V6_vS32b_nt_nqpred_ppu : T_vstore_qpred_ppu<1, 1>,
- V6_vS32b_nt_nqpred_ppu_enc;
-
-//===----------------------------------------------------------------------===//
-// Post increment conditional .new vector stores with register offset
-//===----------------------------------------------------------------------===//
-let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
- isNewValue = 1, opNewValue = 4, isNVStore = 1 in
-class T_vstore_new_pred_ppu <bit isPredNot = 0, bit isNT = 0>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
- "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
- #!if(isNT, ":nt", "")#" = $src4.new", [],
- "$src2 = $_dst_">, NewValueRel {
- let isPredicatedFalse = isPredNot;
-}
-
-let BaseOpcode = "vS32b_ppu" in {
- def V6_vS32b_new_pred_ppu : T_vstore_new_pred_ppu,
- V6_vS32b_new_pred_ppu_enc;
- def V6_vS32b_new_npred_ppu : T_vstore_new_pred_ppu<1>,
- V6_vS32b_new_npred_ppu_enc;
-}
-
-let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
-def V6_vS32b_nt_new_pred_ppu : T_vstore_new_pred_ppu<0, 1>,
- V6_vS32b_nt_new_pred_ppu_enc;
-def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>,
- V6_vS32b_nt_new_npred_ppu_enc;
-}
-
-
-// Vector load/store pseudos
-
-let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
-class STrivv_template<RegisterClass RC>
- : V6_STInst<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src), "", []>;
-
-def PS_vstorerw_ai: STrivv_template<VecDblRegs>,
- Requires<[HasV60T,UseHVXSgl]>;
-def PS_vstorerwu_ai: STrivv_template<VecDblRegs>,
- Requires<[HasV60T,UseHVXSgl]>;
-def PS_vstorerw_ai_128B: STrivv_template<VecDblRegs128B>,
- Requires<[HasV60T,UseHVXDbl]>;
-def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B>,
- Requires<[HasV60T,UseHVXDbl]>;
-
-
-let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
-class LDrivv_template<RegisterClass RC>
- : V6_LDInst<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off), "", []>;
-
-def PS_vloadrw_ai: LDrivv_template<VecDblRegs>,
- Requires<[HasV60T,UseHVXSgl]>;
-def PS_vloadrwu_ai: LDrivv_template<VecDblRegs>,
- Requires<[HasV60T,UseHVXSgl]>;
-def PS_vloadrw_ai_128B: LDrivv_template<VecDblRegs128B>,
- Requires<[HasV60T,UseHVXDbl]>;
-def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B>,
- Requires<[HasV60T,UseHVXDbl]>;
-
-// Store vector predicate pseudo.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
- isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
- def PS_vstorerq_ai : STInst<(outs),
- (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs:$src1),
- ".error \"should not emit\"", []>,
- Requires<[HasV60T,UseHVXSgl]>;
- def PS_vstorerq_ai_128B : STInst<(outs),
- (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1),
- ".error \"should not emit\"", []>,
- Requires<[HasV60T,UseHVXDbl]>;
-}
-
-// Load vector predicate pseudo.
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
- opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
- def PS_vloadrq_ai : LDInst<(outs VecPredRegs:$dst),
- (ins IntRegs:$base, s32_0Imm:$offset),
- ".error \"should not emit\"", []>,
- Requires<[HasV60T,UseHVXSgl]>;
- def PS_vloadrq_ai_128B : LDInst<(outs VecPredRegs128B:$dst),
- (ins IntRegs:$base, s32_0Imm:$offset),
- ".error \"should not emit\"", []>,
- Requires<[HasV60T,UseHVXDbl]>;
-}
-
-class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = CVI_VA_DV,
- IType type = TypeCVI_VA_DV>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
-
-let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
- def PS_vselect: VSELInst<(outs VectorRegs:$dst),
- (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), "", []>,
- Requires<[HasV60T,UseHVXSgl]>;
- def PS_vselect_128B: VSELInst<(outs VectorRegs128B:$dst),
- (ins PredRegs:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3),
- "", []>, Requires<[HasV60T,UseHVXDbl]>;
- def PS_wselect: VSELInst<(outs VecDblRegs:$dst),
- (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3), "", []>,
- Requires<[HasV60T,UseHVXSgl]>;
- def PS_wselect_128B: VSELInst<(outs VecDblRegs128B:$dst),
- (ins PredRegs:$src1, VecDblRegs128B:$src2, VecDblRegs128B:$src3),
- "", []>, Requires<[HasV60T,UseHVXDbl]>;
-}
-
-let hasNewValue = 1 in
-class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
- asmString >;
-
-multiclass T_vmpy <string asmString, RegisterClass RCout,
- RegisterClass RCin> {
- def NAME : T_vmpy <asmString, RCout, RCin>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_vmpy <asmString, !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin#"128B")>;
-}
-
-multiclass T_vmpy_VV <string asmString>:
- T_vmpy <asmString, VectorRegs, VectorRegs>;
-
-multiclass T_vmpy_WW <string asmString>:
- T_vmpy <asmString, VecDblRegs, VecDblRegs>;
-
-multiclass T_vmpy_VW <string asmString>:
- T_vmpy <asmString, VectorRegs, VecDblRegs>;
-
-multiclass T_vmpy_WV <string asmString>:
- T_vmpy <asmString, VecDblRegs, VectorRegs>;
-
-defm V6_vtmpyb :T_vmpy_WW<"$dst.h = vtmpy($src1.b,$src2.b)">, V6_vtmpyb_enc;
-defm V6_vtmpybus :T_vmpy_WW<"$dst.h = vtmpy($src1.ub,$src2.b)">, V6_vtmpybus_enc;
-defm V6_vdsaduh :T_vmpy_WW<"$dst.uw = vdsad($src1.uh,$src2.uh)">, V6_vdsaduh_enc;
-defm V6_vmpybus :T_vmpy_WV<"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybus_enc;
-defm V6_vmpabus :T_vmpy_WW<"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabus_enc;
-defm V6_vmpahb :T_vmpy_WW<"$dst.w = vmpa($src1.h,$src2.b)">, V6_vmpahb_enc;
-defm V6_vmpyh :T_vmpy_WV<"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyh_enc;
-defm V6_vmpyuh :T_vmpy_WV<"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuh_enc;
-defm V6_vmpyiwh :T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_enc;
-defm V6_vtmpyhb :T_vmpy_WW<"$dst.w = vtmpy($src1.h,$src2.b)">, V6_vtmpyhb_enc;
-defm V6_vmpyub :T_vmpy_WV<"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyub_enc;
-
-let Itinerary = CVI_VX_LONG, Type = TypeCVI_VX in
-defm V6_vmpyihb :T_vmpy_VV<"$dst.h = vmpyi($src1.h,$src2.b)">, V6_vmpyihb_enc;
-
-defm V6_vdmpybus_dv :
- T_vmpy_WW <"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_dv_enc;
-defm V6_vdmpyhsusat :
- T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.uh):sat">, V6_vdmpyhsusat_enc;
-defm V6_vdmpyhsuisat :
- T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.uh,#1):sat">, V6_vdmpyhsuisat_enc;
-defm V6_vdmpyhsat :
- T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhsat_enc;
-defm V6_vdmpyhisat :
- T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhisat_enc;
-defm V6_vdmpyhb_dv :
- T_vmpy_WW <"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_dv_enc;
-defm V6_vmpyhss :
- T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:sat">, V6_vmpyhss_enc;
-defm V6_vmpyhsrs :
- T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhsrs_enc;
-
-let Itinerary = CVI_VP, Type = TypeCVI_VP in
-defm V6_vror : T_vmpy_VV <"$dst = vror($src1,$src2)">, V6_vror_enc;
-
-let Itinerary = CVI_VX, Type = TypeCVI_VX in {
-defm V6_vdmpyhb : T_vmpy_VV<"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_enc;
-defm V6_vrmpybus : T_vmpy_VV<"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybus_enc;
-defm V6_vdmpybus : T_vmpy_VV<"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_enc;
-defm V6_vmpyiwb : T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.b)">, V6_vmpyiwb_enc;
-defm V6_vrmpyub : T_vmpy_VV<"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyub_enc;
-}
-
-let Itinerary = CVI_VS, Type = TypeCVI_VS in {
-defm V6_vasrw : T_vmpy_VV <"$dst.w = vasr($src1.w,$src2)">, V6_vasrw_enc;
-defm V6_vasrh : T_vmpy_VV <"$dst.h = vasr($src1.h,$src2)">, V6_vasrh_enc;
-defm V6_vaslw : T_vmpy_VV <"$dst.w = vasl($src1.w,$src2)">, V6_vaslw_enc;
-defm V6_vaslh : T_vmpy_VV <"$dst.h = vasl($src1.h,$src2)">, V6_vaslh_enc;
-defm V6_vlsrw : T_vmpy_VV <"$dst.uw = vlsr($src1.uw,$src2)">, V6_vlsrw_enc;
-defm V6_vlsrh : T_vmpy_VV <"$dst.uh = vlsr($src1.uh,$src2)">, V6_vlsrh_enc;
-}
-
-let hasNewValue = 1 in
-class T_HVX_alu <string asmString, InstrItinClass itin,
- RegisterClass RCout, RegisterClass RCin>
- : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
- asmString >{
- let Itinerary = itin;
- let Type = !cast<IType>("Type"#itin);
-}
-
-multiclass T_HVX_alu <string asmString, RegisterClass RCout,
- RegisterClass RCin, InstrItinClass itin> {
- def NAME : T_HVX_alu <asmString, itin, RCout, RCin>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_alu <asmString, itin,
- !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin#"128B")>;
-}
-
-multiclass T_HVX_alu_VV <string asmString>:
- T_HVX_alu <asmString, VectorRegs, VectorRegs, CVI_VA>;
-
-multiclass T_HVX_alu_WW <string asmString>:
- T_HVX_alu <asmString, VecDblRegs, VecDblRegs, CVI_VA_DV>;
-
-multiclass T_HVX_alu_WV <string asmString>:
- T_HVX_alu <asmString, VecDblRegs, VectorRegs, CVI_VX_DV>;
-
-
-let Itinerary = CVI_VX, Type = TypeCVI_VX in {
-defm V6_vrmpyubv :
- T_HVX_alu_VV <"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyubv_enc;
-defm V6_vrmpybv :
- T_HVX_alu_VV <"$dst.w = vrmpy($src1.b,$src2.b)">, V6_vrmpybv_enc;
-defm V6_vrmpybusv :
- T_HVX_alu_VV <"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybusv_enc;
-defm V6_vabsdiffub :
- T_HVX_alu_VV <"$dst.ub = vabsdiff($src1.ub,$src2.ub)">, V6_vabsdiffub_enc;
-defm V6_vabsdiffh :
- T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.h,$src2.h)">, V6_vabsdiffh_enc;
-defm V6_vabsdiffuh :
- T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.uh,$src2.uh)">, V6_vabsdiffuh_enc;
-defm V6_vabsdiffw :
- T_HVX_alu_VV <"$dst.uw = vabsdiff($src1.w,$src2.w)">, V6_vabsdiffw_enc;
-}
-
-let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
-defm V6_vdmpyhvsat :
- T_HVX_alu_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhvsat_enc;
-defm V6_vmpyhvsrs :
- T_HVX_alu_VV<"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhvsrs_enc;
-defm V6_vmpyih :
- T_HVX_alu_VV <"$dst.h = vmpyi($src1.h,$src2.h)">, V6_vmpyih_enc;
-}
-
-defm V6_vand :
- T_HVX_alu_VV <"$dst = vand($src1,$src2)">, V6_vand_enc;
-defm V6_vor :
- T_HVX_alu_VV <"$dst = vor($src1,$src2)">, V6_vor_enc;
-defm V6_vxor :
- T_HVX_alu_VV <"$dst = vxor($src1,$src2)">, V6_vxor_enc;
-defm V6_vaddw :
- T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_enc;
-defm V6_vaddubsat :
- T_HVX_alu_VV <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_enc;
-defm V6_vadduhsat :
- T_HVX_alu_VV <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_enc;
-defm V6_vaddhsat :
- T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_enc;
-defm V6_vaddwsat :
- T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_enc;
-defm V6_vsubb :
- T_HVX_alu_VV <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_enc;
-defm V6_vsubh :
- T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_enc;
-defm V6_vsubw :
- T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_enc;
-defm V6_vsububsat :
- T_HVX_alu_VV <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_enc;
-defm V6_vsubuhsat :
- T_HVX_alu_VV <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_enc;
-defm V6_vsubhsat :
- T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_enc;
-defm V6_vsubwsat :
- T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_enc;
-defm V6_vavgub :
- T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub)">, V6_vavgub_enc;
-defm V6_vavguh :
- T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh)">, V6_vavguh_enc;
-defm V6_vavgh :
- T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h)">, V6_vavgh_enc;
-defm V6_vavgw :
- T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w)">, V6_vavgw_enc;
-defm V6_vnavgub :
- T_HVX_alu_VV <"$dst.b = vnavg($src1.ub,$src2.ub)">, V6_vnavgub_enc;
-defm V6_vnavgh :
- T_HVX_alu_VV <"$dst.h = vnavg($src1.h,$src2.h)">, V6_vnavgh_enc;
-defm V6_vnavgw :
- T_HVX_alu_VV <"$dst.w = vnavg($src1.w,$src2.w)">, V6_vnavgw_enc;
-defm V6_vavgubrnd :
- T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub):rnd">, V6_vavgubrnd_enc;
-defm V6_vavguhrnd :
- T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh):rnd">, V6_vavguhrnd_enc;
-defm V6_vavghrnd :
- T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h):rnd">, V6_vavghrnd_enc;
-defm V6_vavgwrnd :
- T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w):rnd">, V6_vavgwrnd_enc;
-
-defm V6_vmpybv :
- T_HVX_alu_WV <"$dst.h = vmpy($src1.b,$src2.b)">, V6_vmpybv_enc;
-defm V6_vmpyubv :
- T_HVX_alu_WV <"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyubv_enc;
-defm V6_vmpybusv :
- T_HVX_alu_WV <"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybusv_enc;
-defm V6_vmpyhv :
- T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyhv_enc;
-defm V6_vmpyuhv :
- T_HVX_alu_WV <"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuhv_enc;
-defm V6_vmpyhus :
- T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.uh)">, V6_vmpyhus_enc;
-defm V6_vaddubh :
- T_HVX_alu_WV <"$dst.h = vadd($src1.ub,$src2.ub)">, V6_vaddubh_enc;
-defm V6_vadduhw :
- T_HVX_alu_WV <"$dst.w = vadd($src1.uh,$src2.uh)">, V6_vadduhw_enc;
-defm V6_vaddhw :
- T_HVX_alu_WV <"$dst.w = vadd($src1.h,$src2.h)">, V6_vaddhw_enc;
-defm V6_vsububh :
- T_HVX_alu_WV <"$dst.h = vsub($src1.ub,$src2.ub)">, V6_vsububh_enc;
-defm V6_vsubuhw :
- T_HVX_alu_WV <"$dst.w = vsub($src1.uh,$src2.uh)">, V6_vsubuhw_enc;
-defm V6_vsubhw :
- T_HVX_alu_WV <"$dst.w = vsub($src1.h,$src2.h)">, V6_vsubhw_enc;
-
-defm V6_vaddb_dv :
- T_HVX_alu_WW <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_dv_enc;
-defm V6_vaddh_dv :
- T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_dv_enc;
-defm V6_vaddw_dv :
- T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_dv_enc;
-defm V6_vaddubsat_dv :
- T_HVX_alu_WW <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_dv_enc;
-defm V6_vadduhsat_dv :
- T_HVX_alu_WW <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_dv_enc;
-defm V6_vaddhsat_dv :
- T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_dv_enc;
-defm V6_vaddwsat_dv :
- T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_dv_enc;
-defm V6_vsubb_dv :
- T_HVX_alu_WW <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_dv_enc;
-defm V6_vsubh_dv :
- T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_dv_enc;
-defm V6_vsubw_dv :
- T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_dv_enc;
-defm V6_vsububsat_dv :
- T_HVX_alu_WW <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_dv_enc;
-defm V6_vsubuhsat_dv :
- T_HVX_alu_WW <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_dv_enc;
-defm V6_vsubhsat_dv :
- T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_dv_enc;
-defm V6_vsubwsat_dv :
- T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_dv_enc;
-
-let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV in {
-defm V6_vmpabusv :
- T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabusv_enc;
-defm V6_vmpabuuv :
- T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.ub)">, V6_vmpabuuv_enc;
-}
-
-let isAccumulator = 1, hasNewValue = 1 in
-class T_HVX_vmpyacc <string asmString, InstrItinClass itin, RegisterClass RCout,
- RegisterClass RCin1, RegisterClass RCin2>
- : CVI_VA_Resource1 <(outs RCout:$dst),
- (ins RCout:$_src_, RCin1:$src1, RCin2:$src2), asmString,
- [], "$dst = $_src_" > {
- let Itinerary = itin;
- let Type = !cast<IType>("Type"#itin);
-}
-
-multiclass T_HVX_vmpyacc_both <string asmString, RegisterClass RCout,
- RegisterClass RCin1, RegisterClass RCin2, InstrItinClass itin > {
- def NAME : T_HVX_vmpyacc <asmString, itin, RCout, RCin1, RCin2>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vmpyacc <asmString, itin,
- !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin1#"128B"),
- !cast<RegisterClass>(RCin2#
- !if(!eq (!cast<string>(RCin2), "IntRegs"), "", "128B"))>;
-}
-
-multiclass T_HVX_vmpyacc_VVR <string asmString>:
- T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, IntRegs, CVI_VX>;
-
-multiclass T_HVX_vmpyacc_VWR <string asmString>:
- T_HVX_vmpyacc_both <asmString, VectorRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
-
-multiclass T_HVX_vmpyacc_WVR <string asmString>:
- T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, IntRegs, CVI_VX_DV>;
-
-multiclass T_HVX_vmpyacc_WWR <string asmString>:
- T_HVX_vmpyacc_both <asmString, VecDblRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
-
-multiclass T_HVX_vmpyacc_VVV <string asmString>:
- T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
-
-multiclass T_HVX_vmpyacc_WVV <string asmString>:
- T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
-
-
-defm V6_vtmpyb_acc :
- T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.b,$src2.b)">,
- V6_vtmpyb_acc_enc;
-defm V6_vtmpybus_acc :
- T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.ub,$src2.b)">,
- V6_vtmpybus_acc_enc;
-defm V6_vtmpyhb_acc :
- T_HVX_vmpyacc_WWR <"$dst.w += vtmpy($src1.h,$src2.b)">,
- V6_vtmpyhb_acc_enc;
-defm V6_vdmpyhb_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.b)">,
- V6_vdmpyhb_acc_enc;
-defm V6_vrmpyub_acc :
- T_HVX_vmpyacc_VVR <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
- V6_vrmpyub_acc_enc;
-defm V6_vrmpybus_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vrmpy($src1.ub,$src2.b)">,
- V6_vrmpybus_acc_enc;
-defm V6_vdmpybus_acc :
- T_HVX_vmpyacc_VVR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
- V6_vdmpybus_acc_enc;
-defm V6_vdmpybus_dv_acc :
- T_HVX_vmpyacc_WWR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
- V6_vdmpybus_dv_acc_enc;
-defm V6_vdmpyhsuisat_acc :
- T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.uh,#1):sat">,
- V6_vdmpyhsuisat_acc_enc;
-defm V6_vdmpyhisat_acc :
- T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
- V6_vdmpyhisat_acc_enc;
-defm V6_vdmpyhb_dv_acc :
- T_HVX_vmpyacc_WWR <"$dst.w += vdmpy($src1.h,$src2.b)">,
- V6_vdmpyhb_dv_acc_enc;
-defm V6_vmpybus_acc :
- T_HVX_vmpyacc_WVR <"$dst.h += vmpy($src1.ub,$src2.b)">,
- V6_vmpybus_acc_enc;
-defm V6_vmpabus_acc :
- T_HVX_vmpyacc_WWR <"$dst.h += vmpa($src1.ub,$src2.b)">,
- V6_vmpabus_acc_enc;
-defm V6_vmpahb_acc :
- T_HVX_vmpyacc_WWR <"$dst.w += vmpa($src1.h,$src2.b)">,
- V6_vmpahb_acc_enc;
-defm V6_vmpyhsat_acc :
- T_HVX_vmpyacc_WVR <"$dst.w += vmpy($src1.h,$src2.h):sat">,
- V6_vmpyhsat_acc_enc;
-defm V6_vmpyuh_acc :
- T_HVX_vmpyacc_WVR <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
- V6_vmpyuh_acc_enc;
-defm V6_vmpyiwb_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vmpyi($src1.w,$src2.b)">,
- V6_vmpyiwb_acc_enc;
-defm V6_vdsaduh_acc :
- T_HVX_vmpyacc_WWR <"$dst.uw += vdsad($src1.uh,$src2.uh)">,
- V6_vdsaduh_acc_enc;
-defm V6_vmpyihb_acc :
- T_HVX_vmpyacc_VVR <"$dst.h += vmpyi($src1.h,$src2.b)">,
- V6_vmpyihb_acc_enc;
-defm V6_vmpyub_acc :
- T_HVX_vmpyacc_WVR <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
- V6_vmpyub_acc_enc;
-
-let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
-defm V6_vdmpyhsusat_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.uh):sat">,
- V6_vdmpyhsusat_acc_enc;
-defm V6_vdmpyhsat_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
- V6_vdmpyhsat_acc_enc;
-defm V6_vmpyiwh_acc : T_HVX_vmpyacc_VVR
- <"$dst.w += vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_acc_enc;
-}
-
-let Itinerary = CVI_VS, Type = TypeCVI_VS in {
-defm V6_vaslw_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vasl($src1.w,$src2)">, V6_vaslw_acc_enc;
-defm V6_vasrw_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vasr($src1.w,$src2)">, V6_vasrw_acc_enc;
-}
-
-defm V6_vdmpyhvsat_acc :
- T_HVX_vmpyacc_VVV <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
- V6_vdmpyhvsat_acc_enc;
-defm V6_vmpybusv_acc :
- T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.ub,$src2.b)">,
- V6_vmpybusv_acc_enc;
-defm V6_vmpybv_acc :
- T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.b,$src2.b)">, V6_vmpybv_acc_enc;
-defm V6_vmpyhus_acc :
- T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.uh)">, V6_vmpyhus_acc_enc;
-defm V6_vmpyhv_acc :
- T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.h)">, V6_vmpyhv_acc_enc;
-defm V6_vmpyiewh_acc :
- T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.h)">,
- V6_vmpyiewh_acc_enc;
-defm V6_vmpyiewuh_acc :
- T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.uh)">,
- V6_vmpyiewuh_acc_enc;
-defm V6_vmpyih_acc :
- T_HVX_vmpyacc_VVV <"$dst.h += vmpyi($src1.h,$src2.h)">, V6_vmpyih_acc_enc;
-defm V6_vmpyowh_rnd_sacc :
- T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:rnd:sat:shift">,
- V6_vmpyowh_rnd_sacc_enc;
-defm V6_vmpyowh_sacc :
- T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:sat:shift">,
- V6_vmpyowh_sacc_enc;
-defm V6_vmpyubv_acc :
- T_HVX_vmpyacc_WVV <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
- V6_vmpyubv_acc_enc;
-defm V6_vmpyuhv_acc :
- T_HVX_vmpyacc_WVV <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
- V6_vmpyuhv_acc_enc;
-defm V6_vrmpybusv_acc :
- T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.ub,$src2.b)">,
- V6_vrmpybusv_acc_enc;
-defm V6_vrmpybv_acc :
- T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.b,$src2.b)">, V6_vrmpybv_acc_enc;
-defm V6_vrmpyubv_acc :
- T_HVX_vmpyacc_VVV <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
- V6_vrmpyubv_acc_enc;
-
-
-class T_HVX_vcmp <string asmString, RegisterClass RCout, RegisterClass RCin>
- : CVI_VA_Resource1 <(outs RCout:$dst),
- (ins RCout:$_src_, RCin:$src1, RCin:$src2), asmString,
- [], "$dst = $_src_" > {
- let Itinerary = CVI_VA;
- let Type = TypeCVI_VA;
-}
-
-multiclass T_HVX_vcmp <string asmString> {
- def NAME : T_HVX_vcmp <asmString, VecPredRegs, VectorRegs>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vcmp <asmString, VecPredRegs128B, VectorRegs128B>;
-}
-
-defm V6_veqb_and :
- T_HVX_vcmp <"$dst &= vcmp.eq($src1.b,$src2.b)">, V6_veqb_and_enc;
-defm V6_veqh_and :
- T_HVX_vcmp <"$dst &= vcmp.eq($src1.h,$src2.h)">, V6_veqh_and_enc;
-defm V6_veqw_and :
- T_HVX_vcmp <"$dst &= vcmp.eq($src1.w,$src2.w)">, V6_veqw_and_enc;
-defm V6_vgtb_and :
- T_HVX_vcmp <"$dst &= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_and_enc;
-defm V6_vgth_and :
- T_HVX_vcmp <"$dst &= vcmp.gt($src1.h,$src2.h)">, V6_vgth_and_enc;
-defm V6_vgtw_and :
- T_HVX_vcmp <"$dst &= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_and_enc;
-defm V6_vgtub_and :
- T_HVX_vcmp <"$dst &= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_and_enc;
-defm V6_vgtuh_and :
- T_HVX_vcmp <"$dst &= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_and_enc;
-defm V6_vgtuw_and :
- T_HVX_vcmp <"$dst &= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_and_enc;
-defm V6_veqb_or :
- T_HVX_vcmp <"$dst |= vcmp.eq($src1.b,$src2.b)">, V6_veqb_or_enc;
-defm V6_veqh_or :
- T_HVX_vcmp <"$dst |= vcmp.eq($src1.h,$src2.h)">, V6_veqh_or_enc;
-defm V6_veqw_or :
- T_HVX_vcmp <"$dst |= vcmp.eq($src1.w,$src2.w)">, V6_veqw_or_enc;
-defm V6_vgtb_or :
- T_HVX_vcmp <"$dst |= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_or_enc;
-defm V6_vgth_or :
- T_HVX_vcmp <"$dst |= vcmp.gt($src1.h,$src2.h)">, V6_vgth_or_enc;
-defm V6_vgtw_or :
- T_HVX_vcmp <"$dst |= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_or_enc;
-defm V6_vgtub_or :
- T_HVX_vcmp <"$dst |= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_or_enc;
-defm V6_vgtuh_or :
- T_HVX_vcmp <"$dst |= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_or_enc;
-defm V6_vgtuw_or :
- T_HVX_vcmp <"$dst |= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_or_enc;
-defm V6_veqb_xor :
- T_HVX_vcmp <"$dst ^= vcmp.eq($src1.b,$src2.b)">, V6_veqb_xor_enc;
-defm V6_veqh_xor :
- T_HVX_vcmp <"$dst ^= vcmp.eq($src1.h,$src2.h)">, V6_veqh_xor_enc;
-defm V6_veqw_xor :
- T_HVX_vcmp <"$dst ^= vcmp.eq($src1.w,$src2.w)">, V6_veqw_xor_enc;
-defm V6_vgtb_xor :
- T_HVX_vcmp <"$dst ^= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_xor_enc;
-defm V6_vgth_xor :
- T_HVX_vcmp <"$dst ^= vcmp.gt($src1.h,$src2.h)">, V6_vgth_xor_enc;
-defm V6_vgtw_xor :
- T_HVX_vcmp <"$dst ^= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_xor_enc;
-defm V6_vgtub_xor :
- T_HVX_vcmp <"$dst ^= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_xor_enc;
-defm V6_vgtuh_xor :
- T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_xor_enc;
-defm V6_vgtuw_xor :
- T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_xor_enc;
-
-defm V6_vminub :
- T_HVX_alu_VV <"$dst.ub = vmin($src1.ub,$src2.ub)">, V6_vminub_enc;
-defm V6_vminuh :
- T_HVX_alu_VV <"$dst.uh = vmin($src1.uh,$src2.uh)">, V6_vminuh_enc;
-defm V6_vminh :
- T_HVX_alu_VV <"$dst.h = vmin($src1.h,$src2.h)">, V6_vminh_enc;
-defm V6_vminw :
- T_HVX_alu_VV <"$dst.w = vmin($src1.w,$src2.w)">, V6_vminw_enc;
-defm V6_vmaxub :
- T_HVX_alu_VV <"$dst.ub = vmax($src1.ub,$src2.ub)">, V6_vmaxub_enc;
-defm V6_vmaxuh :
- T_HVX_alu_VV <"$dst.uh = vmax($src1.uh,$src2.uh)">, V6_vmaxuh_enc;
-defm V6_vmaxh :
- T_HVX_alu_VV <"$dst.h = vmax($src1.h,$src2.h)">, V6_vmaxh_enc;
-defm V6_vmaxw :
- T_HVX_alu_VV <"$dst.w = vmax($src1.w,$src2.w)">, V6_vmaxw_enc;
-defm V6_vshuffeb :
- T_HVX_alu_VV <"$dst.b = vshuffe($src1.b,$src2.b)">, V6_vshuffeb_enc;
-defm V6_vshuffob :
- T_HVX_alu_VV <"$dst.b = vshuffo($src1.b,$src2.b)">, V6_vshuffob_enc;
-defm V6_vshufeh :
- T_HVX_alu_VV <"$dst.h = vshuffe($src1.h,$src2.h)">, V6_vshufeh_enc;
-defm V6_vshufoh :
- T_HVX_alu_VV <"$dst.h = vshuffo($src1.h,$src2.h)">, V6_vshufoh_enc;
-
-let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
-defm V6_vmpyowh_rnd :
- T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:rnd:sat">,
- V6_vmpyowh_rnd_enc;
-defm V6_vmpyiewuh :
- T_HVX_alu_VV <"$dst.w = vmpyie($src1.w,$src2.uh)">, V6_vmpyiewuh_enc;
-defm V6_vmpyewuh :
- T_HVX_alu_VV <"$dst.w = vmpye($src1.w,$src2.uh)">, V6_vmpyewuh_enc;
-defm V6_vmpyowh :
- T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:sat">, V6_vmpyowh_enc;
-defm V6_vmpyiowh :
- T_HVX_alu_VV <"$dst.w = vmpyio($src1.w,$src2.h)">, V6_vmpyiowh_enc;
-}
-let Itinerary = CVI_VX, Type = TypeCVI_VX in
-defm V6_vmpyieoh :
- T_HVX_alu_VV <"$dst.w = vmpyieo($src1.h,$src2.h)">, V6_vmpyieoh_enc;
-
-let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in {
-defm V6_vshufoeh :
- T_HVX_alu_WV <"$dst.h = vshuffoe($src1.h,$src2.h)">, V6_vshufoeh_enc;
-defm V6_vshufoeb :
- T_HVX_alu_WV <"$dst.b = vshuffoe($src1.b,$src2.b)">, V6_vshufoeb_enc;
-}
-
-let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
-defm V6_vcombine :
- T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc;
-
-let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in {
-defm V6_vsathub :
- T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc;
-defm V6_vsatwh :
- T_HVX_alu_VV <"$dst.h = vsat($src1.w,$src2.w)">, V6_vsatwh_enc;
-}
-
-let Itinerary = CVI_VS, Type = TypeCVI_VS in {
-defm V6_vroundwh :
- T_HVX_alu_VV <"$dst.h = vround($src1.w,$src2.w):sat">, V6_vroundwh_enc;
-defm V6_vroundwuh :
- T_HVX_alu_VV <"$dst.uh = vround($src1.w,$src2.w):sat">, V6_vroundwuh_enc;
-defm V6_vroundhb :
- T_HVX_alu_VV <"$dst.b = vround($src1.h,$src2.h):sat">, V6_vroundhb_enc;
-defm V6_vroundhub :
- T_HVX_alu_VV <"$dst.ub = vround($src1.h,$src2.h):sat">, V6_vroundhub_enc;
-defm V6_vasrwv :
- T_HVX_alu_VV <"$dst.w = vasr($src1.w,$src2.w)">, V6_vasrwv_enc;
-defm V6_vlsrwv :
- T_HVX_alu_VV <"$dst.w = vlsr($src1.w,$src2.w)">, V6_vlsrwv_enc;
-defm V6_vlsrhv :
- T_HVX_alu_VV <"$dst.h = vlsr($src1.h,$src2.h)">, V6_vlsrhv_enc;
-defm V6_vasrhv :
- T_HVX_alu_VV <"$dst.h = vasr($src1.h,$src2.h)">, V6_vasrhv_enc;
-defm V6_vaslwv :
- T_HVX_alu_VV <"$dst.w = vasl($src1.w,$src2.w)">, V6_vaslwv_enc;
-defm V6_vaslhv :
- T_HVX_alu_VV <"$dst.h = vasl($src1.h,$src2.h)">, V6_vaslhv_enc;
-}
-
-defm V6_vaddb :
- T_HVX_alu_VV <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_enc;
-defm V6_vaddh :
- T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_enc;
-
-let Itinerary = CVI_VP, Type = TypeCVI_VP in {
-defm V6_vdelta :
- T_HVX_alu_VV <"$dst = vdelta($src1,$src2)">, V6_vdelta_enc;
-defm V6_vrdelta :
- T_HVX_alu_VV <"$dst = vrdelta($src1,$src2)">, V6_vrdelta_enc;
-defm V6_vdealb4w :
- T_HVX_alu_VV <"$dst.b = vdeale($src1.b,$src2.b)">, V6_vdealb4w_enc;
-defm V6_vpackeb :
- T_HVX_alu_VV <"$dst.b = vpacke($src1.h,$src2.h)">, V6_vpackeb_enc;
-defm V6_vpackeh :
- T_HVX_alu_VV <"$dst.h = vpacke($src1.w,$src2.w)">, V6_vpackeh_enc;
-defm V6_vpackhub_sat :
- T_HVX_alu_VV <"$dst.ub = vpack($src1.h,$src2.h):sat">, V6_vpackhub_sat_enc;
-defm V6_vpackhb_sat :
- T_HVX_alu_VV <"$dst.b = vpack($src1.h,$src2.h):sat">, V6_vpackhb_sat_enc;
-defm V6_vpackwuh_sat :
- T_HVX_alu_VV <"$dst.uh = vpack($src1.w,$src2.w):sat">, V6_vpackwuh_sat_enc;
-defm V6_vpackwh_sat :
- T_HVX_alu_VV <"$dst.h = vpack($src1.w,$src2.w):sat">, V6_vpackwh_sat_enc;
-defm V6_vpackob :
- T_HVX_alu_VV <"$dst.b = vpacko($src1.h,$src2.h)">, V6_vpackob_enc;
-defm V6_vpackoh :
- T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc;
-}
-
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_HVX_condALU <string asmString, RegisterClass RC1, RegisterClass RC2>
- : CVI_VA_Resource1 <(outs RC2:$dst),
- (ins RC1:$src1, RC2:$_src_, RC2:$src2), asmString,
- [], "$dst = $_src_" > {
- let Itinerary = CVI_VA;
- let Type = TypeCVI_VA;
-}
-
-multiclass T_HVX_condALU <string asmString> {
- def NAME : T_HVX_condALU <asmString, VecPredRegs, VectorRegs>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_condALU <asmString, VecPredRegs128B, VectorRegs128B>;
-}
-
-defm V6_vaddbq : T_HVX_condALU <"if ($src1) $dst.b += $src2.b">,
- V6_vaddbq_enc;
-defm V6_vaddhq : T_HVX_condALU <"if ($src1) $dst.h += $src2.h">,
- V6_vaddhq_enc;
-defm V6_vaddwq : T_HVX_condALU <"if ($src1) $dst.w += $src2.w">,
- V6_vaddwq_enc;
-defm V6_vsubbq : T_HVX_condALU <"if ($src1) $dst.b -= $src2.b">,
- V6_vsubbq_enc;
-defm V6_vsubhq : T_HVX_condALU <"if ($src1) $dst.h -= $src2.h">,
- V6_vsubhq_enc;
-defm V6_vsubwq : T_HVX_condALU <"if ($src1) $dst.w -= $src2.w">,
- V6_vsubwq_enc;
-defm V6_vaddbnq : T_HVX_condALU <"if (!$src1) $dst.b += $src2.b">,
- V6_vaddbnq_enc;
-defm V6_vaddhnq : T_HVX_condALU <"if (!$src1) $dst.h += $src2.h">,
- V6_vaddhnq_enc;
-defm V6_vaddwnq : T_HVX_condALU <"if (!$src1) $dst.w += $src2.w">,
- V6_vaddwnq_enc;
-defm V6_vsubbnq : T_HVX_condALU <"if (!$src1) $dst.b -= $src2.b">,
- V6_vsubbnq_enc;
-defm V6_vsubhnq : T_HVX_condALU <"if (!$src1) $dst.h -= $src2.h">,
- V6_vsubhnq_enc;
-defm V6_vsubwnq : T_HVX_condALU <"if (!$src1) $dst.w -= $src2.w">,
- V6_vsubwnq_enc;
-
-let hasNewValue = 1 in
-class T_HVX_alu_2op <string asmString, InstrItinClass itin,
- RegisterClass RCout, RegisterClass RCin>
- : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1),
- asmString >{
- let Itinerary = itin;
- let Type = !cast<IType>("Type"#itin);
-}
-
-multiclass T_HVX_alu_2op <string asmString, RegisterClass RCout,
- RegisterClass RCin, InstrItinClass itin> {
- def NAME : T_HVX_alu_2op <asmString, itin, RCout, RCin>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_alu_2op <asmString, itin,
- !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin#"128B")>;
-}
-
-let hasNewValue = 1 in
-multiclass T_HVX_alu_2op_VV <string asmString>:
- T_HVX_alu_2op <asmString, VectorRegs, VectorRegs, CVI_VA>;
-
-multiclass T_HVX_alu_2op_WV <string asmString>:
- T_HVX_alu_2op <asmString, VecDblRegs, VectorRegs, CVI_VA_DV>;
-
-
-defm V6_vabsh : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h)">,
- V6_vabsh_enc;
-defm V6_vabsw : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w)">,
- V6_vabsw_enc;
-defm V6_vabsh_sat : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h):sat">,
- V6_vabsh_sat_enc;
-defm V6_vabsw_sat : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w):sat">,
- V6_vabsw_sat_enc;
-defm V6_vnot : T_HVX_alu_2op_VV <"$dst = vnot($src1)">,
- V6_vnot_enc;
-defm V6_vassign : T_HVX_alu_2op_VV <"$dst = $src1">,
- V6_vassign_enc;
-
-defm V6_vzb : T_HVX_alu_2op_WV <"$dst.uh = vzxt($src1.ub)">,
- V6_vzb_enc;
-defm V6_vzh : T_HVX_alu_2op_WV <"$dst.uw = vzxt($src1.uh)">,
- V6_vzh_enc;
-defm V6_vsb : T_HVX_alu_2op_WV <"$dst.h = vsxt($src1.b)">,
- V6_vsb_enc;
-defm V6_vsh : T_HVX_alu_2op_WV <"$dst.w = vsxt($src1.h)">,
- V6_vsh_enc;
-
-let Itinerary = CVI_VP, Type = TypeCVI_VP in {
-defm V6_vdealh : T_HVX_alu_2op_VV <"$dst.h = vdeal($src1.h)">,
- V6_vdealh_enc;
-defm V6_vdealb : T_HVX_alu_2op_VV <"$dst.b = vdeal($src1.b)">,
- V6_vdealb_enc;
-defm V6_vshuffh : T_HVX_alu_2op_VV <"$dst.h = vshuff($src1.h)">,
- V6_vshuffh_enc;
-defm V6_vshuffb : T_HVX_alu_2op_VV <"$dst.b = vshuff($src1.b)">,
- V6_vshuffb_enc;
-}
-
-let Itinerary = CVI_VP_VS, Type = TypeCVI_VP_VS in {
-defm V6_vunpackub : T_HVX_alu_2op_WV <"$dst.uh = vunpack($src1.ub)">,
- V6_vunpackub_enc;
-defm V6_vunpackuh : T_HVX_alu_2op_WV <"$dst.uw = vunpack($src1.uh)">,
- V6_vunpackuh_enc;
-defm V6_vunpackb : T_HVX_alu_2op_WV <"$dst.h = vunpack($src1.b)">,
- V6_vunpackb_enc;
-defm V6_vunpackh : T_HVX_alu_2op_WV <"$dst.w = vunpack($src1.h)">,
- V6_vunpackh_enc;
-}
-
-let Itinerary = CVI_VS, Type = TypeCVI_VS in {
-defm V6_vcl0w : T_HVX_alu_2op_VV <"$dst.uw = vcl0($src1.uw)">,
- V6_vcl0w_enc;
-defm V6_vcl0h : T_HVX_alu_2op_VV <"$dst.uh = vcl0($src1.uh)">,
- V6_vcl0h_enc;
-defm V6_vnormamtw : T_HVX_alu_2op_VV <"$dst.w = vnormamt($src1.w)">,
- V6_vnormamtw_enc;
-defm V6_vnormamth : T_HVX_alu_2op_VV <"$dst.h = vnormamt($src1.h)">,
- V6_vnormamth_enc;
-defm V6_vpopcounth : T_HVX_alu_2op_VV <"$dst.h = vpopcount($src1.h)">,
- V6_vpopcounth_enc;
-}
-
-let isAccumulator = 1, hasNewValue = 1, Itinerary = CVI_VX_DV_LONG,
- Type = TypeCVI_VX_DV in
-class T_HVX_vmpyacc2 <string asmString, RegisterClass RC>
- : CVI_VA_Resource1 <(outs RC:$dst),
- (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1_0Imm:$src3),
- asmString, [], "$dst = $_src_" > ;
-
-
-multiclass T_HVX_vmpyacc2 <string asmString> {
- def NAME : T_HVX_vmpyacc2 <asmString, VecDblRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vmpyacc2 <asmString, VecDblRegs128B>;
-}
-
-defm V6_vrmpybusi_acc :
- T_HVX_vmpyacc2<"$dst.w += vrmpy($src1.ub,$src2.b,#$src3)">,
- V6_vrmpybusi_acc_enc;
-defm V6_vrsadubi_acc :
- T_HVX_vmpyacc2<"$dst.uw += vrsad($src1.ub,$src2.ub,#$src3)">,
- V6_vrsadubi_acc_enc;
-defm V6_vrmpyubi_acc :
- T_HVX_vmpyacc2<"$dst.uw += vrmpy($src1.ub,$src2.ub,#$src3)">,
- V6_vrmpyubi_acc_enc;
-
-
-let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV, hasNewValue = 1 in
-class T_HVX_vmpy2 <string asmString, RegisterClass RC>
- : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1_0Imm:$src3),
- asmString>;
-
-
-multiclass T_HVX_vmpy2 <string asmString> {
- def NAME : T_HVX_vmpy2 <asmString, VecDblRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vmpy2 <asmString, VecDblRegs128B>;
-}
-
-defm V6_vrmpybusi :
- T_HVX_vmpy2 <"$dst.w = vrmpy($src1.ub,$src2.b,#$src3)">, V6_vrmpybusi_enc;
-defm V6_vrsadubi :
- T_HVX_vmpy2 <"$dst.uw = vrsad($src1.ub,$src2.ub,#$src3)">, V6_vrsadubi_enc;
-defm V6_vrmpyubi :
- T_HVX_vmpy2 <"$dst.uw = vrmpy($src1.ub,$src2.ub,#$src3)">, V6_vrmpyubi_enc;
-
-
-let Itinerary = CVI_VP_VS_LONG_EARLY, Type = TypeCVI_VP_VS,
- hasSideEffects = 0, hasNewValue2 = 1, opNewValue2 = 1 in
-class T_HVX_perm <string asmString, RegisterClass RC>
- : CVI_VA_Resource1 <(outs RC:$_dst1_, RC:$_dst2_),
- (ins RC:$src1, RC:$src2, IntRegs:$src3),
- asmString, [], "$_dst1_ = $src1, $_dst2_ = $src2" >;
-
-multiclass T_HVX_perm <string asmString> {
- def NAME : T_HVX_perm <asmString, VectorRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_perm <asmString, VectorRegs128B>;
-}
-
-let hasNewValue = 1, opNewValue = 0, hasNewValue2 = 1, opNewValue2 = 1 in {
- defm V6_vshuff : T_HVX_perm <"vshuff($src1,$src2,$src3)">, V6_vshuff_enc;
- defm V6_vdeal : T_HVX_perm <"vdeal($src1,$src2,$src3)">, V6_vdeal_enc;
-}
-
-// Conditional vector move.
-let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-class T_HVX_cmov <bit isPredNot, RegisterClass RC>
- : CVI_VA_Resource1 <(outs RC:$dst), (ins PredRegs:$src1, RC:$src2),
- "if ("#!if(isPredNot, "!", "")#"$src1) $dst = $src2"> {
- let isPredicatedFalse = isPredNot;
-}
-
-multiclass T_HVX_cmov <bit isPredNot = 0> {
- def NAME : T_HVX_cmov <isPredNot, VectorRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_cmov <isPredNot, VectorRegs128B>;
-}
-
-defm V6_vcmov : T_HVX_cmov, V6_vcmov_enc;
-defm V6_vncmov : T_HVX_cmov<1>, V6_vncmov_enc;
-
-// Conditional vector combine.
-let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, isPredicated = 1,
- hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-class T_HVX_ccombine <bit isPredNot, RegisterClass RCout, RegisterClass RCin>
- : CVI_VA_Resource1 < (outs RCout:$dst),
- (ins PredRegs:$src1, RCin:$src2, RCin:$src3),
- "if ("#!if(isPredNot, "!", "")#"$src1) $dst = vcombine($src2,$src3)"> {
- let isPredicatedFalse = isPredNot;
-}
-
-multiclass T_HVX_ccombine <bit isPredNot = 0> {
- def NAME : T_HVX_ccombine <isPredNot, VecDblRegs, VectorRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_ccombine <isPredNot, VecDblRegs128B, VectorRegs128B>;
-}
-
-defm V6_vccombine : T_HVX_ccombine, V6_vccombine_enc;
-defm V6_vnccombine : T_HVX_ccombine<1>, V6_vnccombine_enc;
-
-let hasNewValue = 1 in
-class T_HVX_shift <string asmString, RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_DV_Resource1<(outs RCout:$dst),
- (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
- asmString >;
-
-multiclass T_HVX_shift <string asmString, RegisterClass RCout,
- RegisterClass RCin> {
- def NAME : T_HVX_shift <asmString, RCout, RCin>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_shift <asmString, !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin#"128B")>;
-}
-
-multiclass T_HVX_shift_VV <string asmString>:
- T_HVX_shift <asmString, VectorRegs, VectorRegs>;
-
-multiclass T_HVX_shift_WV <string asmString>:
- T_HVX_shift <asmString, VecDblRegs, VectorRegs>;
-
-let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in {
-defm V6_valignb :
- T_HVX_shift_VV <"$dst = valign($src1,$src2,$src3)">, V6_valignb_enc;
-defm V6_vlalignb :
- T_HVX_shift_VV <"$dst = vlalign($src1,$src2,$src3)">, V6_vlalignb_enc;
-}
-
-let Itinerary = CVI_VS, Type = TypeCVI_VS in {
-defm V6_vasrwh :
- T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3)">, V6_vasrwh_enc;
-defm V6_vasrwhsat :
- T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):sat">,
- V6_vasrwhsat_enc;
-defm V6_vasrwhrndsat :
- T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):rnd:sat">,
- V6_vasrwhrndsat_enc;
-defm V6_vasrwuhsat :
- T_HVX_shift_VV <"$dst.uh = vasr($src1.w,$src2.w,$src3):sat">,
- V6_vasrwuhsat_enc;
-defm V6_vasrhubsat :
- T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):sat">,
- V6_vasrhubsat_enc;
-defm V6_vasrhubrndsat :
- T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):rnd:sat">,
- V6_vasrhubrndsat_enc;
-defm V6_vasrhbrndsat :
- T_HVX_shift_VV <"$dst.b = vasr($src1.h,$src2.h,$src3):rnd:sat">,
- V6_vasrhbrndsat_enc;
-}
-
-// Assembler mapped -- alias?
-//defm V6_vtran2x2vdd : T_HVX_shift_VV <"">, V6_vtran2x2vdd_enc;
-let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in {
-defm V6_vshuffvdd :
- T_HVX_shift_WV <"$dst = vshuff($src1,$src2,$src3)">, V6_vshuffvdd_enc;
-defm V6_vdealvdd :
- T_HVX_shift_WV <"$dst = vdeal($src1,$src2,$src3)">, V6_vdealvdd_enc;
-}
-
-let hasNewValue = 1, Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in
-class T_HVX_unpack <string asmString, RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCout:$_src_, RCin:$src1),
- asmString, [], "$dst = $_src_">;
-
-multiclass T_HVX_unpack <string asmString> {
- def NAME : T_HVX_unpack <asmString, VecDblRegs, VectorRegs>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_unpack <asmString, VecDblRegs128B, VectorRegs128B>;
-}
-
-defm V6_vunpackob : T_HVX_unpack <"$dst.h |= vunpacko($src1.b)">, V6_vunpackob_enc;
-defm V6_vunpackoh : T_HVX_unpack <"$dst.w |= vunpacko($src1.h)">, V6_vunpackoh_enc;
-
-let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1,
- hasSideEffects = 0 in
-class T_HVX_valign <string asmString, RegisterClass RC>
- : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3_0Imm:$src3),
- asmString>;
-
-multiclass T_HVX_valign <string asmString> {
- def NAME : T_HVX_valign <asmString, VectorRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_valign <asmString, VectorRegs128B>;
-}
-
-defm V6_valignbi :
- T_HVX_valign <"$dst = valign($src1,$src2,#$src3)">, V6_valignbi_enc;
-defm V6_vlalignbi :
- T_HVX_valign <"$dst = vlalign($src1,$src2,#$src3)">, V6_vlalignbi_enc;
-
-let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
-class T_HVX_predAlu <string asmString, RegisterClass RC>
- : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2),
- asmString>;
-
-multiclass T_HVX_predAlu <string asmString> {
- def NAME : T_HVX_predAlu <asmString, VecPredRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_predAlu <asmString, VecPredRegs128B>;
-}
-
-defm V6_pred_and : T_HVX_predAlu <"$dst = and($src1,$src2)">, V6_pred_and_enc;
-defm V6_pred_or : T_HVX_predAlu <"$dst = or($src1,$src2)">, V6_pred_or_enc;
-defm V6_pred_xor : T_HVX_predAlu <"$dst = xor($src1,$src2)">, V6_pred_xor_enc;
-defm V6_pred_or_n : T_HVX_predAlu <"$dst = or($src1,!$src2)">, V6_pred_or_n_enc;
-defm V6_pred_and_n :
- T_HVX_predAlu <"$dst = and($src1,!$src2)">, V6_pred_and_n_enc;
-
-let Itinerary = CVI_VA, Type = TypeCVI_VA in
-class T_HVX_prednot <RegisterClass RC>
- : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1),
- "$dst = not($src1)">, V6_pred_not_enc;
-
-def V6_pred_not : T_HVX_prednot <VecPredRegs>;
-let isCodeGenOnly = 1 in
-def V6_pred_not_128B : T_HVX_prednot <VecPredRegs128B>;
-
-let Itinerary = CVI_VA, Type = TypeCVI_VA in
-class T_HVX_vcmp2 <string asmString, RegisterClass RCout, RegisterClass RCin>
- : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
- asmString >;
-
-multiclass T_HVX_vcmp2 <string asmString> {
- def NAME : T_HVX_vcmp2 <asmString, VecPredRegs, VectorRegs>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vcmp2 <asmString, VecPredRegs128B, VectorRegs128B>;
-}
-
-defm V6_veqb : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.b,$src2.b)">, V6_veqb_enc;
-defm V6_veqh : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.h,$src2.h)">, V6_veqh_enc;
-defm V6_veqw : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.w,$src2.w)">, V6_veqw_enc;
-defm V6_vgtb : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.b,$src2.b)">, V6_vgtb_enc;
-defm V6_vgth : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.h,$src2.h)">, V6_vgth_enc;
-defm V6_vgtw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.w,$src2.w)">, V6_vgtw_enc;
-defm V6_vgtub : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_enc;
-defm V6_vgtuh : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_enc;
-defm V6_vgtuw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_enc;
-
-let isAccumulator = 1, hasNewValue = 1, hasSideEffects = 0 in
-class T_V6_vandqrt_acc <RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_Resource_late<(outs RCout:$dst),
- (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
- "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandqrt_acc_enc;
-
-def V6_vandqrt_acc : T_V6_vandqrt_acc <VectorRegs, VecPredRegs>;
-let isCodeGenOnly = 1 in
-def V6_vandqrt_acc_128B : T_V6_vandqrt_acc <VectorRegs128B, VecPredRegs128B>;
-
-let isAccumulator = 1 in
-class T_V6_vandvrt_acc <RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_Resource_late<(outs RCout:$dst),
- (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
- "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandvrt_acc_enc;
-
-def V6_vandvrt_acc : T_V6_vandvrt_acc <VecPredRegs, VectorRegs>;
-let isCodeGenOnly = 1 in
-def V6_vandvrt_acc_128B : T_V6_vandvrt_acc <VecPredRegs128B, VectorRegs128B>;
-
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_V6_vandqrt <RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_Resource_late<(outs RCout:$dst),
- (ins RCin:$src1, IntRegs:$src2),
- "$dst = vand($src1,$src2)" >, V6_vandqrt_enc;
-
-def V6_vandqrt : T_V6_vandqrt <VectorRegs, VecPredRegs>;
-let isCodeGenOnly = 1 in
-def V6_vandqrt_128B : T_V6_vandqrt <VectorRegs128B, VecPredRegs128B>;
-
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_V6_lvsplatw <RegisterClass RC>
- : CVI_VX_Resource_late<(outs RC:$dst), (ins IntRegs:$src1),
- "$dst = vsplat($src1)" >, V6_lvsplatw_enc;
-
-def V6_lvsplatw : T_V6_lvsplatw <VectorRegs>;
-let isCodeGenOnly = 1 in
-def V6_lvsplatw_128B : T_V6_lvsplatw <VectorRegs128B>;
-
-
-let hasNewValue = 1 in
-class T_V6_vinsertwr <RegisterClass RC>
- : CVI_VX_Resource_late<(outs RC:$dst), (ins RC:$_src_, IntRegs:$src1),
- "$dst.w = vinsert($src1)", [], "$dst = $_src_">,
- V6_vinsertwr_enc;
-
-def V6_vinsertwr : T_V6_vinsertwr <VectorRegs>;
-let isCodeGenOnly = 1 in
-def V6_vinsertwr_128B : T_V6_vinsertwr <VectorRegs128B>;
-
-
-let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in
-class T_V6_pred_scalar2 <RegisterClass RC>
- : CVI_VA_Resource1<(outs RC:$dst), (ins IntRegs:$src1),
- "$dst = vsetq($src1)">, V6_pred_scalar2_enc;
-
-def V6_pred_scalar2 : T_V6_pred_scalar2 <VecPredRegs>;
-let isCodeGenOnly = 1 in
-def V6_pred_scalar2_128B : T_V6_pred_scalar2 <VecPredRegs128B>;
-
-class T_V6_vandvrt <RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_Resource_late<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
- "$dst = vand($src1,$src2)">, V6_vandvrt_enc;
-
-def V6_vandvrt : T_V6_vandvrt <VecPredRegs, VectorRegs>;
-let isCodeGenOnly = 1 in
-def V6_vandvrt_128B : T_V6_vandvrt <VecPredRegs128B, VectorRegs128B>;
-
-let validSubTargets = HasV60SubT in
-class T_HVX_rol <string asmString, RegisterClass RC, Operand ImmOp >
- : SInst2 <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2), asmString>;
-
-class T_HVX_rol_R <string asmString>
- : T_HVX_rol <asmString, IntRegs, u5_0Imm>;
-class T_HVX_rol_P <string asmString>
- : T_HVX_rol <asmString, DoubleRegs, u6_0Imm>;
-
-def S6_rol_i_p : T_HVX_rol_P <"$dst = rol($src1,#$src2)">, S6_rol_i_p_enc;
-let hasNewValue = 1, opNewValue = 0 in
-def S6_rol_i_r : T_HVX_rol_R <"$dst = rol($src1,#$src2)">, S6_rol_i_r_enc;
-
-let validSubTargets = HasV60SubT in
-class T_HVX_rol_acc <string asmString, RegisterClass RC, Operand ImmOp>
- : SInst2 <(outs RC:$dst), (ins RC:$_src_, RC:$src1, ImmOp:$src2),
- asmString, [], "$dst = $_src_" >;
-
-class T_HVX_rol_acc_P <string asmString>
- : T_HVX_rol_acc <asmString, DoubleRegs, u6_0Imm>;
-
-class T_HVX_rol_acc_R <string asmString>
- : T_HVX_rol_acc <asmString, IntRegs, u5_0Imm>;
-
-def S6_rol_i_p_nac :
- T_HVX_rol_acc_P <"$dst -= rol($src1,#$src2)">, S6_rol_i_p_nac_enc;
-def S6_rol_i_p_acc :
- T_HVX_rol_acc_P <"$dst += rol($src1,#$src2)">, S6_rol_i_p_acc_enc;
-def S6_rol_i_p_and :
- T_HVX_rol_acc_P <"$dst &= rol($src1,#$src2)">, S6_rol_i_p_and_enc;
-def S6_rol_i_p_or :
- T_HVX_rol_acc_P <"$dst |= rol($src1,#$src2)">, S6_rol_i_p_or_enc;
-def S6_rol_i_p_xacc :
- T_HVX_rol_acc_P<"$dst ^= rol($src1,#$src2)">, S6_rol_i_p_xacc_enc;
-
-let hasNewValue = 1, opNewValue = 0 in {
-def S6_rol_i_r_nac :
- T_HVX_rol_acc_R <"$dst -= rol($src1,#$src2)">, S6_rol_i_r_nac_enc;
-def S6_rol_i_r_acc :
- T_HVX_rol_acc_R <"$dst += rol($src1,#$src2)">, S6_rol_i_r_acc_enc;
-def S6_rol_i_r_and :
- T_HVX_rol_acc_R <"$dst &= rol($src1,#$src2)">, S6_rol_i_r_and_enc;
-def S6_rol_i_r_or :
- T_HVX_rol_acc_R <"$dst |= rol($src1,#$src2)">, S6_rol_i_r_or_enc;
-def S6_rol_i_r_xacc :
- T_HVX_rol_acc_R <"$dst ^= rol($src1,#$src2)">, S6_rol_i_r_xacc_enc;
-}
-
-let isSolo = 1, Itinerary = LD_tc_ld_SLOT0, Type = TypeLD in
-class T_V6_extractw <RegisterClass RC>
- : LD1Inst <(outs IntRegs:$dst), (ins RC:$src1, IntRegs:$src2),
- "$dst = vextract($src1,$src2)">, V6_extractw_enc;
-
-def V6_extractw : T_V6_extractw <VectorRegs>;
-let isCodeGenOnly = 1 in
-def V6_extractw_128B : T_V6_extractw <VectorRegs128B>;
-
-let Itinerary = ST_tc_st_SLOT0, validSubTargets = HasV55SubT in
-class T_sys0op <string asmString>
- : ST1Inst <(outs), (ins), asmString>;
-
-let isSolo = 1, validSubTargets = HasV55SubT in {
-def Y5_l2gunlock : T_sys0op <"l2gunlock">, Y5_l2gunlock_enc;
-def Y5_l2gclean : T_sys0op <"l2gclean">, Y5_l2gclean_enc;
-def Y5_l2gcleaninv : T_sys0op <"l2gcleaninv">, Y5_l2gcleaninv_enc;
-}
-
-class T_sys1op <string asmString, RegisterClass RC>
- : ST1Inst <(outs), (ins RC:$src1), asmString>;
-
-class T_sys1op_R <string asmString> : T_sys1op <asmString, IntRegs>;
-class T_sys1op_P <string asmString> : T_sys1op <asmString, DoubleRegs>;
-
-let isSoloAX = 1, validSubTargets = HasV55SubT in
-def Y5_l2unlocka : T_sys1op_R <"l2unlocka($src1)">, Y5_l2unlocka_enc;
-
-let isSolo = 1, validSubTargets = HasV60SubT in {
-def Y6_l2gcleanpa : T_sys1op_P <"l2gclean($src1)">, Y6_l2gcleanpa_enc;
-def Y6_l2gcleaninvpa : T_sys1op_P <"l2gcleaninv($src1)">, Y6_l2gcleaninvpa_enc;
-}
-
-let Itinerary = ST_tc_3stall_SLOT0, isPredicateLate = 1, isSoloAX = 1,
- validSubTargets = HasV55SubT in
-def Y5_l2locka : ST1Inst <(outs PredRegs:$dst), (ins IntRegs:$src1),
- "$dst = l2locka($src1)">, Y5_l2locka_enc;
-
-// not defined on etc side. why?
-// defm S2_cabacencbin : _VV <"Rdd=encbin(Rss,$src2,Pu)">, S2_cabacencbin_enc;
-
-let Defs = [USR_OVF], Itinerary = M_tc_3stall_SLOT23, isPredicateLate = 1,
- hasSideEffects = 0,
-validSubTargets = HasV55SubT in
-def A5_ACS : MInst2 <(outs DoubleRegs:$dst1, PredRegs:$dst2),
- (ins DoubleRegs:$_src_, DoubleRegs:$src1, DoubleRegs:$src2),
- "$dst1,$dst2 = vacsh($src1,$src2)", [],
- "$dst1 = $_src_" >, Requires<[HasV55T]>, A5_ACS_enc;
-
-let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, hasNewValue = 1,
- hasSideEffects = 0 in
-class T_HVX_alu2 <string asmString, RegisterClass RCout, RegisterClass RCin1,
- RegisterClass RCin2>
- : CVI_VA_Resource1<(outs RCout:$dst),
- (ins RCin1:$src1, RCin2:$src2, RCin2:$src3), asmString>;
-
-multiclass T_HVX_alu2 <string asmString, RegisterClass RC > {
- def NAME : T_HVX_alu2 <asmString, RC, VecPredRegs, VectorRegs>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_alu2 <asmString, !cast<RegisterClass>(RC#"128B"),
- VecPredRegs128B, VectorRegs128B>;
-}
-
-multiclass T_HVX_alu2_V <string asmString> :
- T_HVX_alu2 <asmString, VectorRegs>;
-
-multiclass T_HVX_alu2_W <string asmString> :
- T_HVX_alu2 <asmString, VecDblRegs>;
-
-defm V6_vswap : T_HVX_alu2_W <"$dst = vswap($src1,$src2,$src3)">, V6_vswap_enc;
-
-let Itinerary = CVI_VA, Type = TypeCVI_VA, hasNewValue = 1,
- hasSideEffects = 0 in
-defm V6_vmux : T_HVX_alu2_V <"$dst = vmux($src1,$src2,$src3)">, V6_vmux_enc;
-
-class T_HVX_vlutb <string asmString, RegisterClass RCout, RegisterClass RCin>
- : CVI_VA_Resource1<(outs RCout:$dst),
- (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), asmString>;
-
-multiclass T_HVX_vlutb <string asmString, RegisterClass RCout,
- RegisterClass RCin> {
- def NAME : T_HVX_vlutb <asmString, RCout, RCin>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vlutb <asmString, !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin#"128B")>;
-}
-
-multiclass T_HVX_vlutb_V <string asmString> :
- T_HVX_vlutb <asmString, VectorRegs, VectorRegs>;
-
-multiclass T_HVX_vlutb_W <string asmString> :
- T_HVX_vlutb <asmString, VecDblRegs, VectorRegs>;
-
-let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, isAccumulator = 1 in
-class T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
- RegisterClass RCin>
- : CVI_VA_Resource1<(outs RCout:$dst),
- (ins RCout:$_src_, RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
- asmString, [], "$dst = $_src_">;
-
-multiclass T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
- RegisterClass RCin> {
- def NAME : T_HVX_vlutb_acc <asmString, RCout, RCin>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vlutb_acc<asmString,
- !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin#"128B")>;
-}
-
-multiclass T_HVX_vlutb_acc_V <string asmString> :
- T_HVX_vlutb_acc <asmString, VectorRegs, VectorRegs>;
-
-multiclass T_HVX_vlutb_acc_W <string asmString> :
- T_HVX_vlutb_acc <asmString, VecDblRegs, VectorRegs>;
-
-
-let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1 in
-defm V6_vlutvvb:
- T_HVX_vlutb_V <"$dst.b = vlut32($src1.b,$src2.b,$src3)">, V6_vlutvvb_enc;
-
-let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, hasNewValue = 1 in
-defm V6_vlutvwh:
- T_HVX_vlutb_W <"$dst.h = vlut16($src1.b,$src2.h,$src3)">, V6_vlutvwh_enc;
-
-let hasNewValue = 1 in {
- defm V6_vlutvvb_oracc:
- T_HVX_vlutb_acc_V <"$dst.b |= vlut32($src1.b,$src2.b,$src3)">,
- V6_vlutvvb_oracc_enc;
- defm V6_vlutvwh_oracc:
- T_HVX_vlutb_acc_W <"$dst.h |= vlut16($src1.b,$src2.h,$src3)">,
- V6_vlutvwh_oracc_enc;
-}
-
-// It's a fake instruction and should not be defined?
-def S2_cabacencbin
- : SInst2<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
- "$dst = encbin($src1,$src2,$src3)">, S2_cabacencbin_enc;
-
-// Vhist instructions
-def V6_vhistq
- : CVI_HIST_Resource1 <(outs), (ins VecPredRegs:$src1),
- "vhist($src1)">, V6_vhistq_enc;
-
-def V6_vhist
- : CVI_HIST_Resource1 <(outs), (ins),
- "vhist" >, V6_vhist_enc;
-
-
-let isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in {
- def V6_vd0: CVI_VA_Resource<(outs VectorRegs:$dst), (ins), "$dst = #0", []>;
- def V6_vd0_128B: CVI_VA_Resource<(outs VectorRegs128B:$dst), (ins),
- "$dst = #0", []>;
-
- def V6_vassignp: CVI_VA_Resource<(outs VecDblRegs:$dst),
- (ins VecDblRegs:$src), "", []>;
- def V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst),
- (ins VecDblRegs128B:$src), "", []>;
-
- def V6_lo: CVI_VA_Resource<(outs VectorRegs:$dst), (ins VecDblRegs:$src1),
- "", []>;
- def V6_lo_128B: CVI_VA_Resource<(outs VectorRegs128B:$dst),
- (ins VecDblRegs128B:$src1), "", []>;
-
- def V6_hi: CVI_VA_Resource<(outs VectorRegs:$dst), (ins VecDblRegs:$src1),
- "", []>;
- def V6_hi_128B: CVI_VA_Resource<(outs VectorRegs128B:$dst),
- (ins VecDblRegs128B:$src1), "", []>;
-}
diff --git a/lib/Target/Hexagon/HexagonInstrInfoVector.td b/lib/Target/Hexagon/HexagonInstrInfoVector.td
deleted file mode 100644
index e3520bd6e515..000000000000
--- a/lib/Target/Hexagon/HexagonInstrInfoVector.td
+++ /dev/null
@@ -1,69 +0,0 @@
-//===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon Vector instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-// Vector shift support. Vector shifting in Hexagon is rather different
-// from internal representation of LLVM.
-// LLVM assumes all shifts (in vector case) will have the form
-// <VT> = SHL/SRA/SRL <VT> by <VT>
-// while Hexagon has the following format:
-// <VT> = SHL/SRA/SRL <VT> by <IT/i32>
-// As a result, special care is needed to guarantee correctness and
-// performance.
-class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
- : S_2OpInstImm<Str, MajOp, MinOp, u4_0Imm, []> {
- bits<4> src2;
- let Inst{11-8} = src2;
-}
-
-class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
- : S_2OpInstImm<Str, MajOp, MinOp, u5_0Imm, []> {
- bits<5> src2;
- let Inst{12-8} = src2;
-}
-
-def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>;
-def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>;
-def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>;
-
-def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>;
-def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>;
-def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>;
-
-// Vector shift words by register
-def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>;
-def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>;
-def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>;
-def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>;
-
-// Vector shift halfwords by register
-def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>;
-def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>;
-def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>;
-def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>;
-
-
-// Hexagon doesn't have a vector multiply with C semantics.
-// Instead, generate a pseudo instruction that gets expaneded into two
-// scalar MPYI instructions.
-// This is expanded by ExpandPostRAPseudos.
-let isPseudo = 1 in
-def PS_vmulw : PseudoM<(outs DoubleRegs:$Rd),
- (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "", []>;
-
-let isPseudo = 1 in
-def PS_vmulw_acc : PseudoM<(outs DoubleRegs:$Rd),
- (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), "", [],
- "$Rd = $Rx">;
-
-
-
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index d4f303bf6ff0..c611857ec26a 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -1347,6 +1347,25 @@ def: T_stc_pat<S2_storeri_pci, int_hexagon_circ_stw, s4_2ImmPred, I32>;
def: T_stc_pat<S2_storerd_pci, int_hexagon_circ_std, s4_3ImmPred, I64>;
def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
+multiclass MaskedStore <InstHexagon MI, Intrinsic IntID> {
+ def : Pat<(IntID VecPredRegs:$src1, IntRegs:$src2, VectorRegs:$src3),
+ (MI VecPredRegs:$src1, IntRegs:$src2, #0, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def : Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ IntRegs:$src2,
+ VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ IntRegs:$src2, #0,
+ VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : MaskedStore <V6_vS32b_qpred_ai, int_hexagon_V6_vmaskedstoreq>;
+defm : MaskedStore <V6_vS32b_nqpred_ai, int_hexagon_V6_vmaskedstorenq>;
+defm : MaskedStore <V6_vS32b_nt_qpred_ai, int_hexagon_V6_vmaskedstorentq>;
+defm : MaskedStore <V6_vS32b_nt_nqpred_ai, int_hexagon_V6_vmaskedstorentnq>;
+
include "HexagonIntrinsicsV3.td"
include "HexagonIntrinsicsV4.td"
include "HexagonIntrinsicsV5.td"
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/lib/Target/Hexagon/HexagonIntrinsicsV60.td
index a45e1c9d7be4..f438b3e0368f 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV60.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -790,7 +790,7 @@ def : T_RRI_pat <S6_rol_i_r_xacc, int_hexagon_S6_rol_i_r_xacc>;
defm : T_VR_pat <V6_extractw, int_hexagon_V6_extractw>;
defm : T_VR_pat <V6_vinsertwr, int_hexagon_V6_vinsertwr>;
-def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>;
+//def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>;
def: Pat<(v64i16 (trunc v64i32:$Vdd)),
(v64i16 (V6_vpackwh_sat_128B
diff --git a/lib/Target/Hexagon/HexagonIsetDx.td b/lib/Target/Hexagon/HexagonIsetDx.td
deleted file mode 100644
index ebedf2cbaf17..000000000000
--- a/lib/Target/Hexagon/HexagonIsetDx.td
+++ /dev/null
@@ -1,728 +0,0 @@
-//=- HexagonIsetDx.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon duplex instructions.
-//
-//===----------------------------------------------------------------------===//
-
-// SA1_combine1i: Combines.
-let isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_combine1i: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins u2_0Imm:$u2),
- "$Rdd = combine(#1, #$u2)"> {
- bits<3> Rdd;
- bits<2> u2;
-
- let Inst{12-10} = 0b111;
- let Inst{8} = 0b0;
- let Inst{4-3} = 0b01;
- let Inst{2-0} = Rdd;
- let Inst{6-5} = u2;
- }
-
-// SL2_jumpr31_f: Indirect conditional jump if false.
-// SL2_jumpr31_f -> SL2_jumpr31_fnew
-let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def SL2_jumpr31_f: SUBInst <
- (outs ),
- (ins ),
- "if (!p0) jumpr r31"> {
- let Inst{12-6} = 0b1111111;
- let Inst{2-0} = 0b101;
- }
-
-// SL2_deallocframe: Deallocate stack frame.
-let Defs = [R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in
-def SL2_deallocframe: SUBInst <
- (outs ),
- (ins ),
- "deallocframe"> {
- let Inst{12-6} = 0b1111100;
- let Inst{2} = 0b0;
- }
-
-// SL2_return_f: Deallocate stack frame and return.
-// SL2_return_f -> SL2_return_fnew
-let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def SL2_return_f: SUBInst <
- (outs ),
- (ins ),
- "if (!p0) dealloc_return"> {
- let Inst{12-6} = 0b1111101;
- let Inst{2-0} = 0b101;
- }
-
-// SA1_combine3i: Combines.
-let isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_combine3i: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins u2_0Imm:$u2),
- "$Rdd = combine(#3, #$u2)"> {
- bits<3> Rdd;
- bits<2> u2;
-
- let Inst{12-10} = 0b111;
- let Inst{8} = 0b0;
- let Inst{4-3} = 0b11;
- let Inst{2-0} = Rdd;
- let Inst{6-5} = u2;
- }
-
-// SS2_storebi0: Store byte.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
-def SS2_storebi0: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u4_0Imm:$u4_0),
- "memb($Rs + #$u4_0)=#0"> {
- bits<4> Rs;
- bits<4> u4_0;
-
- let Inst{12-8} = 0b10010;
- let Inst{7-4} = Rs;
- let Inst{3-0} = u4_0;
- }
-
-// SA1_clrtnew: Clear if true.
-let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_clrtnew: SUBInst <
- (outs IntRegs:$Rd),
- (ins PredRegs:$Pu),
- "if ($Pu.new) $Rd = #0"> {
- bits<4> Rd;
-
- let Inst{12-9} = 0b1101;
- let Inst{6-4} = 0b100;
- let Inst{3-0} = Rd;
- }
-
-// SL2_loadruh_io: Load half.
-let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in
-def SL2_loadruh_io: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u3_1Imm:$u3_1),
- "$Rd = memuh($Rs + #$u3_1)"> {
- bits<4> Rd;
- bits<4> Rs;
- bits<4> u3_1;
-
- let Inst{12-11} = 0b01;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- let Inst{10-8} = u3_1{3-1};
- }
-
-// SL2_jumpr31_tnew: Indirect conditional jump if true.
-let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def SL2_jumpr31_tnew: SUBInst <
- (outs ),
- (ins ),
- "if (p0.new) jumpr:nt r31"> {
- let Inst{12-6} = 0b1111111;
- let Inst{2-0} = 0b110;
- }
-
-// SA1_addi: Add.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 1, opExtentBits = 7, opExtendable = 2 in
-def SA1_addi: SUBInst <
- (outs IntRegs:$Rx),
- (ins IntRegs:$_src_, s7_0Ext:$s7),
- "$Rx = add($_src_, #$s7)" ,
- [] ,
- "$_src_ = $Rx"> {
- bits<4> Rx;
- bits<7> s7;
-
- let Inst{12-11} = 0b00;
- let Inst{3-0} = Rx;
- let Inst{10-4} = s7;
- }
-
-// SL1_loadrub_io: Load byte.
-let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in
-def SL1_loadrub_io: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u4_0Imm:$u4_0),
- "$Rd = memub($Rs + #$u4_0)"> {
- bits<4> Rd;
- bits<4> Rs;
- bits<4> u4_0;
-
- let Inst{12} = 0b1;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- let Inst{11-8} = u4_0;
- }
-
-// SL1_loadri_io: Load word.
-let isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in
-def SL1_loadri_io: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u4_2Imm:$u4_2),
- "$Rd = memw($Rs + #$u4_2)"> {
- bits<4> Rd;
- bits<4> Rs;
- bits<6> u4_2;
-
- let Inst{12} = 0b0;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- let Inst{11-8} = u4_2{5-2};
- }
-
-// SA1_cmpeqi: Compareimmed.
-let Defs = [P0], isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_cmpeqi: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u2_0Imm:$u2),
- "p0 = cmp.eq($Rs, #$u2)"> {
- bits<4> Rs;
- bits<2> u2;
-
- let Inst{12-8} = 0b11001;
- let Inst{7-4} = Rs;
- let Inst{1-0} = u2;
- }
-
-// SA1_combinerz: Combines.
-let isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_combinerz: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins IntRegs:$Rs),
- "$Rdd = combine($Rs, #0)"> {
- bits<3> Rdd;
- bits<4> Rs;
-
- let Inst{12-10} = 0b111;
- let Inst{8} = 0b1;
- let Inst{3} = 0b1;
- let Inst{2-0} = Rdd;
- let Inst{7-4} = Rs;
- }
-
-// SL2_return_t: Deallocate stack frame and return.
-// SL2_return_t -> SL2_return_tnew
-let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def SL2_return_t: SUBInst <
- (outs ),
- (ins ),
- "if (p0) dealloc_return"> {
- let Inst{12-6} = 0b1111101;
- let Inst{2-0} = 0b100;
- }
-
-// SS2_allocframe: Allocate stack frame.
-let Defs = [R29, R30], Uses = [R30, R31, R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in
-def SS2_allocframe: SUBInst <
- (outs ),
- (ins u5_3Imm:$u5_3),
- "allocframe(#$u5_3)"> {
- bits<8> u5_3;
-
- let Inst{12-9} = 0b1110;
- let Inst{8-4} = u5_3{7-3};
- }
-
-// SS2_storeh_io: Store half.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = HalfWordAccess in
-def SS2_storeh_io: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u3_1Imm:$u3_1, IntRegs:$Rt),
- "memh($Rs + #$u3_1) = $Rt"> {
- bits<4> Rs;
- bits<4> u3_1;
- bits<4> Rt;
-
- let Inst{12-11} = 0b00;
- let Inst{7-4} = Rs;
- let Inst{10-8} = u3_1{3-1};
- let Inst{3-0} = Rt;
- }
-
-// SS2_storewi0: Store word.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
-def SS2_storewi0: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u4_2Imm:$u4_2),
- "memw($Rs + #$u4_2)=#0"> {
- bits<4> Rs;
- bits<6> u4_2;
-
- let Inst{12-8} = 0b10000;
- let Inst{7-4} = Rs;
- let Inst{3-0} = u4_2{5-2};
- }
-
-// SS2_storewi1: Store word.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
-def SS2_storewi1: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u4_2Imm:$u4_2),
- "memw($Rs + #$u4_2)=#1"> {
- bits<4> Rs;
- bits<6> u4_2;
-
- let Inst{12-8} = 0b10001;
- let Inst{7-4} = Rs;
- let Inst{3-0} = u4_2{5-2};
- }
-
-// SL2_jumpr31: Indirect conditional jump if true.
-let Defs = [PC], Uses = [R31], isCodeGenOnly = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def SL2_jumpr31: SUBInst <
- (outs ),
- (ins ),
- "jumpr r31"> {
- let Inst{12-6} = 0b1111111;
- let Inst{2} = 0b0;
- }
-
-// SA1_combinezr: Combines.
-let isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_combinezr: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins IntRegs:$Rs),
- "$Rdd = combine(#0, $Rs)"> {
- bits<3> Rdd;
- bits<4> Rs;
-
- let Inst{12-10} = 0b111;
- let Inst{8} = 0b1;
- let Inst{3} = 0b0;
- let Inst{2-0} = Rdd;
- let Inst{7-4} = Rs;
- }
-
-// SL2_loadrh_io: Load half.
-let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in
-def SL2_loadrh_io: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u3_1Imm:$u3_1),
- "$Rd = memh($Rs + #$u3_1)"> {
- bits<4> Rd;
- bits<4> Rs;
- bits<4> u3_1;
-
- let Inst{12-11} = 0b00;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- let Inst{10-8} = u3_1{3-1};
- }
-
-// SA1_addrx: Add.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_addrx: SUBInst <
- (outs IntRegs:$Rx),
- (ins IntRegs:$_src_, IntRegs:$Rs),
- "$Rx = add($_src_, $Rs)" ,
- [] ,
- "$_src_ = $Rx"> {
- bits<4> Rx;
- bits<4> Rs;
-
- let Inst{12-8} = 0b11000;
- let Inst{3-0} = Rx;
- let Inst{7-4} = Rs;
- }
-
-// SA1_setin1: Set to -1.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_setin1: SUBInst <
- (outs IntRegs:$Rd),
- (ins ),
- "$Rd = #{-1}"> {
- bits<4> Rd;
-
- let Inst{12-9} = 0b1101;
- let Inst{6} = 0b0;
- let Inst{3-0} = Rd;
- }
-
-// SA1_sxth: Sxth.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_sxth: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = sxth($Rs)"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10100;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SA1_combine0i: Combines.
-let isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_combine0i: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins u2_0Imm:$u2),
- "$Rdd = combine(#0, #$u2)"> {
- bits<3> Rdd;
- bits<2> u2;
-
- let Inst{12-10} = 0b111;
- let Inst{8} = 0b0;
- let Inst{4-3} = 0b00;
- let Inst{2-0} = Rdd;
- let Inst{6-5} = u2;
- }
-
-// SA1_combine2i: Combines.
-let isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_combine2i: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins u2_0Imm:$u2),
- "$Rdd = combine(#2, #$u2)"> {
- bits<3> Rdd;
- bits<2> u2;
-
- let Inst{12-10} = 0b111;
- let Inst{8} = 0b0;
- let Inst{4-3} = 0b10;
- let Inst{2-0} = Rdd;
- let Inst{6-5} = u2;
- }
-
-// SA1_sxtb: Sxtb.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_sxtb: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = sxtb($Rs)"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10101;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SA1_clrf: Clear if false.
-// SA1_clrf -> SA1_clrfnew
-let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_clrf: SUBInst <
- (outs IntRegs:$Rd),
- (ins PredRegs:$Pu),
- "if (!$Pu) $Rd = #0"> {
- bits<4> Rd;
-
- let Inst{12-9} = 0b1101;
- let Inst{6-4} = 0b111;
- let Inst{3-0} = Rd;
- }
-
-// SL2_loadrb_io: Load byte.
-let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in
-def SL2_loadrb_io: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u3_0Imm:$u3_0),
- "$Rd = memb($Rs + #$u3_0)"> {
- bits<4> Rd;
- bits<4> Rs;
- bits<3> u3_0;
-
- let Inst{12-11} = 0b10;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- let Inst{10-8} = u3_0;
- }
-
-// SA1_tfr: Tfr.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_tfr: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = $Rs"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10000;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SL2_loadrd_sp: Load dword.
-let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in
-def SL2_loadrd_sp: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins u5_3Imm:$u5_3),
- "$Rdd = memd(r29 + #$u5_3)"> {
- bits<3> Rdd;
- bits<8> u5_3;
-
- let Inst{12-8} = 0b11110;
- let Inst{2-0} = Rdd;
- let Inst{7-3} = u5_3{7-3};
- }
-
-// SA1_and1: And #1.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_and1: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = and($Rs, #1)"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10010;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SS2_storebi1: Store byte.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
-def SS2_storebi1: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u4_0Imm:$u4_0),
- "memb($Rs + #$u4_0)=#1"> {
- bits<4> Rs;
- bits<4> u4_0;
-
- let Inst{12-8} = 0b10011;
- let Inst{7-4} = Rs;
- let Inst{3-0} = u4_0;
- }
-
-// SA1_inc: Inc.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_inc: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = add($Rs, #1)"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10001;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SS2_stored_sp: Store dword.
-let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in
-def SS2_stored_sp: SUBInst <
- (outs ),
- (ins s6_3Imm:$s6_3, DoubleRegs:$Rtt),
- "memd(r29 + #$s6_3) = $Rtt"> {
- bits<9> s6_3;
- bits<3> Rtt;
-
- let Inst{12-9} = 0b0101;
- let Inst{8-3} = s6_3{8-3};
- let Inst{2-0} = Rtt;
- }
-
-// SS2_storew_sp: Store word.
-let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
-def SS2_storew_sp: SUBInst <
- (outs ),
- (ins u5_2Imm:$u5_2, IntRegs:$Rt),
- "memw(r29 + #$u5_2) = $Rt"> {
- bits<7> u5_2;
- bits<4> Rt;
-
- let Inst{12-9} = 0b0100;
- let Inst{8-4} = u5_2{6-2};
- let Inst{3-0} = Rt;
- }
-
-// SL2_jumpr31_fnew: Indirect conditional jump if false.
-let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def SL2_jumpr31_fnew: SUBInst <
- (outs ),
- (ins ),
- "if (!p0.new) jumpr:nt r31"> {
- let Inst{12-6} = 0b1111111;
- let Inst{2-0} = 0b111;
- }
-
-// SA1_clrt: Clear if true.
-// SA1_clrt -> SA1_clrtnew
-let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_clrt: SUBInst <
- (outs IntRegs:$Rd),
- (ins PredRegs:$Pu),
- "if ($Pu) $Rd = #0"> {
- bits<4> Rd;
-
- let Inst{12-9} = 0b1101;
- let Inst{6-4} = 0b110;
- let Inst{3-0} = Rd;
- }
-
-// SL2_return: Deallocate stack frame and return.
-let Defs = [PC, R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def SL2_return: SUBInst <
- (outs ),
- (ins ),
- "dealloc_return"> {
- let Inst{12-6} = 0b1111101;
- let Inst{2} = 0b0;
- }
-
-// SA1_dec: Dec.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_dec: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = add($Rs,#{-1})"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10011;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SA1_seti: Set immed.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 0, opExtentBits = 6, opExtendable = 1 in
-def SA1_seti: SUBInst <
- (outs IntRegs:$Rd),
- (ins u6_0Ext:$u6),
- "$Rd = #$u6"> {
- bits<4> Rd;
- bits<6> u6;
-
- let Inst{12-10} = 0b010;
- let Inst{3-0} = Rd;
- let Inst{9-4} = u6;
- }
-
-// SL2_jumpr31_t: Indirect conditional jump if true.
-// SL2_jumpr31_t -> SL2_jumpr31_tnew
-let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def SL2_jumpr31_t: SUBInst <
- (outs ),
- (ins ),
- "if (p0) jumpr r31"> {
- let Inst{12-6} = 0b1111111;
- let Inst{2-0} = 0b100;
- }
-
-// SA1_clrfnew: Clear if false.
-let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_clrfnew: SUBInst <
- (outs IntRegs:$Rd),
- (ins PredRegs:$Pu),
- "if (!$Pu.new) $Rd = #0"> {
- bits<4> Rd;
-
- let Inst{12-9} = 0b1101;
- let Inst{6-4} = 0b101;
- let Inst{3-0} = Rd;
- }
-
-// SS1_storew_io: Store word.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
-def SS1_storew_io: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u4_2Imm:$u4_2, IntRegs:$Rt),
- "memw($Rs + #$u4_2) = $Rt"> {
- bits<4> Rs;
- bits<6> u4_2;
- bits<4> Rt;
-
- let Inst{12} = 0b0;
- let Inst{7-4} = Rs;
- let Inst{11-8} = u4_2{5-2};
- let Inst{3-0} = Rt;
- }
-
-// SA1_zxtb: Zxtb.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_zxtb: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = and($Rs, #255)"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10111;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SA1_addsp: Add.
-let Uses = [R29], isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_addsp: SUBInst <
- (outs IntRegs:$Rd),
- (ins u6_2Imm:$u6_2),
- "$Rd = add(r29, #$u6_2)"> {
- bits<4> Rd;
- bits<8> u6_2;
-
- let Inst{12-10} = 0b011;
- let Inst{3-0} = Rd;
- let Inst{9-4} = u6_2{7-2};
- }
-
-// SL2_loadri_sp: Load word.
-let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in
-def SL2_loadri_sp: SUBInst <
- (outs IntRegs:$Rd),
- (ins u5_2Imm:$u5_2),
- "$Rd = memw(r29 + #$u5_2)"> {
- bits<4> Rd;
- bits<7> u5_2;
-
- let Inst{12-9} = 0b1110;
- let Inst{3-0} = Rd;
- let Inst{8-4} = u5_2{6-2};
- }
-
-// SS1_storeb_io: Store byte.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
-def SS1_storeb_io: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u4_0Imm:$u4_0, IntRegs:$Rt),
- "memb($Rs + #$u4_0) = $Rt"> {
- bits<4> Rs;
- bits<4> u4_0;
- bits<4> Rt;
-
- let Inst{12} = 0b1;
- let Inst{7-4} = Rs;
- let Inst{11-8} = u4_0;
- let Inst{3-0} = Rt;
- }
-
-// SL2_return_tnew: Deallocate stack frame and return.
-let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def SL2_return_tnew: SUBInst <
- (outs ),
- (ins ),
- "if (p0.new) dealloc_return:nt"> {
- let Inst{12-6} = 0b1111101;
- let Inst{2-0} = 0b110;
- }
-
-// SL2_return_fnew: Deallocate stack frame and return.
-let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def SL2_return_fnew: SUBInst <
- (outs ),
- (ins ),
- "if (!p0.new) dealloc_return:nt"> {
- let Inst{12-6} = 0b1111101;
- let Inst{2-0} = 0b111;
- }
-
-// SA1_zxth: Zxth.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_zxth: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = zxth($Rs)"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10110;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
new file mode 100644
index 000000000000..b5948475e1f7
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -0,0 +1,2338 @@
+//===--- HexagonLoopIdiomRecognition.cpp ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-lir"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <array>
+
+using namespace llvm;
+
+static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
+ cl::Hidden, cl::init(false),
+ cl::desc("Disable generation of memcpy in loop idiom recognition"));
+
+static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
+ cl::Hidden, cl::init(false),
+ cl::desc("Disable generation of memmove in loop idiom recognition"));
+
+static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
+ cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
+ "check guarding the memmove."));
+
+static cl::opt<unsigned> CompileTimeMemSizeThreshold(
+ "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
+ cl::desc("Threshold (in bytes) to perform the transformation, if the "
+ "runtime loop count (mem transfer size) is known at compile-time."));
+
+static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
+ cl::Hidden, cl::init(true),
+ cl::desc("Only enable generating memmove in non-nested loops"));
+
+cl::opt<bool> HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy",
+ cl::Hidden, cl::init(false),
+ cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
+
+static const char *HexagonVolatileMemcpyName
+ = "hexagon_memcpy_forward_vp4cp4n2";
+
+
+namespace llvm {
+ void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
+ Pass *createHexagonLoopIdiomPass();
+}
+
+namespace {
+ class HexagonLoopIdiomRecognize : public LoopPass {
+ public:
+ static char ID;
+ explicit HexagonLoopIdiomRecognize() : LoopPass(ID) {
+ initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ }
+ StringRef getPassName() const override {
+ return "Recognize Hexagon-specific loop idioms";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+ private:
+ unsigned getStoreSizeInBytes(StoreInst *SI);
+ int getSCEVStride(const SCEVAddRecExpr *StoreEv);
+ bool isLegalStore(Loop *CurLoop, StoreInst *SI);
+ void collectStores(Loop *CurLoop, BasicBlock *BB,
+ SmallVectorImpl<StoreInst*> &Stores);
+ bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
+ bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts) const;
+ bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks);
+ bool runOnCountableLoop(Loop *L);
+
+ AliasAnalysis *AA;
+ const DataLayout *DL;
+ DominatorTree *DT;
+ LoopInfo *LF;
+ const TargetLibraryInfo *TLI;
+ ScalarEvolution *SE;
+ bool HasMemcpy, HasMemmove;
+ };
+}
+
+char HexagonLoopIdiomRecognize::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",
+ "Recognize Hexagon-specific loop idioms", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",
+ "Recognize Hexagon-specific loop idioms", false, false)
+
+
+namespace {
+ struct Simplifier {
+ typedef std::function<Value* (Instruction*, LLVMContext&)> Rule;
+
+ void addRule(const Rule &R) { Rules.push_back(R); }
+
+ private:
+ struct WorkListType {
+ WorkListType() = default;
+
+ void push_back(Value* V) {
+ // Do not push back duplicates.
+ if (!S.count(V)) { Q.push_back(V); S.insert(V); }
+ }
+ Value *pop_front_val() {
+ Value *V = Q.front(); Q.pop_front(); S.erase(V);
+ return V;
+ }
+ bool empty() const { return Q.empty(); }
+
+ private:
+ std::deque<Value*> Q;
+ std::set<Value*> S;
+ };
+
+ typedef std::set<Value*> ValueSetType;
+ std::vector<Rule> Rules;
+
+ public:
+ struct Context {
+ typedef DenseMap<Value*,Value*> ValueMapType;
+
+ Value *Root;
+ ValueSetType Used; // The set of all cloned values used by Root.
+ ValueSetType Clones; // The set of all cloned values.
+ LLVMContext &Ctx;
+
+ Context(Instruction *Exp)
+ : Ctx(Exp->getParent()->getParent()->getContext()) {
+ initialize(Exp);
+ }
+ ~Context() { cleanup(); }
+ void print(raw_ostream &OS, const Value *V) const;
+
+ Value *materialize(BasicBlock *B, BasicBlock::iterator At);
+
+ private:
+ void initialize(Instruction *Exp);
+ void cleanup();
+
+ template <typename FuncT> void traverse(Value *V, FuncT F);
+ void record(Value *V);
+ void use(Value *V);
+ void unuse(Value *V);
+
+ bool equal(const Instruction *I, const Instruction *J) const;
+ Value *find(Value *Tree, Value *Sub) const;
+ Value *subst(Value *Tree, Value *OldV, Value *NewV);
+ void replace(Value *OldV, Value *NewV);
+ void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
+
+ friend struct Simplifier;
+ };
+
+ Value *simplify(Context &C);
+ };
+
+ struct PE {
+ PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
+ const Simplifier::Context &C;
+ const Value *V;
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS, const PE &P) LLVM_ATTRIBUTE_USED;
+ raw_ostream &operator<< (raw_ostream &OS, const PE &P) {
+ P.C.print(OS, P.V ? P.V : P.C.Root);
+ return OS;
+ }
+}
+
+
+template <typename FuncT>
+void Simplifier::Context::traverse(Value *V, FuncT F) {
+ WorkListType Q;
+ Q.push_back(V);
+
+ while (!Q.empty()) {
+ Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
+ if (!U || U->getParent())
+ continue;
+ if (!F(U))
+ continue;
+ for (Value *Op : U->operands())
+ Q.push_back(Op);
+ }
+}
+
+
+void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
+ const auto *U = dyn_cast<const Instruction>(V);
+ if (!U) {
+ OS << V << '(' << *V << ')';
+ return;
+ }
+
+ if (U->getParent()) {
+ OS << U << '(';
+ U->printAsOperand(OS, true);
+ OS << ')';
+ return;
+ }
+
+ unsigned N = U->getNumOperands();
+ if (N != 0)
+ OS << U << '(';
+ OS << U->getOpcodeName();
+ for (const Value *Op : U->operands()) {
+ OS << ' ';
+ print(OS, Op);
+ }
+ if (N != 0)
+ OS << ')';
+}
+
+
+void Simplifier::Context::initialize(Instruction *Exp) {
+ // Perform a deep clone of the expression, set Root to the root
+ // of the clone, and build a map from the cloned values to the
+ // original ones.
+ ValueMapType M;
+ BasicBlock *Block = Exp->getParent();
+ WorkListType Q;
+ Q.push_back(Exp);
+
+ while (!Q.empty()) {
+ Value *V = Q.pop_front_val();
+ if (M.find(V) != M.end())
+ continue;
+ if (Instruction *U = dyn_cast<Instruction>(V)) {
+ if (isa<PHINode>(U) || U->getParent() != Block)
+ continue;
+ for (Value *Op : U->operands())
+ Q.push_back(Op);
+ M.insert({U, U->clone()});
+ }
+ }
+
+ for (std::pair<Value*,Value*> P : M) {
+ Instruction *U = cast<Instruction>(P.second);
+ for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
+ auto F = M.find(U->getOperand(i));
+ if (F != M.end())
+ U->setOperand(i, F->second);
+ }
+ }
+
+ auto R = M.find(Exp);
+ assert(R != M.end());
+ Root = R->second;
+
+ record(Root);
+ use(Root);
+}
+
+
+void Simplifier::Context::record(Value *V) {
+ auto Record = [this](Instruction *U) -> bool {
+ Clones.insert(U);
+ return true;
+ };
+ traverse(V, Record);
+}
+
+
+void Simplifier::Context::use(Value *V) {
+ auto Use = [this](Instruction *U) -> bool {
+ Used.insert(U);
+ return true;
+ };
+ traverse(V, Use);
+}
+
+
+void Simplifier::Context::unuse(Value *V) {
+ if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
+ return;
+
+ auto Unuse = [this](Instruction *U) -> bool {
+ if (!U->use_empty())
+ return false;
+ Used.erase(U);
+ return true;
+ };
+ traverse(V, Unuse);
+}
+
+
+Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
+ if (Tree == OldV)
+ return NewV;
+ if (OldV == NewV)
+ return Tree;
+
+ WorkListType Q;
+ Q.push_back(Tree);
+ while (!Q.empty()) {
+ Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
+ // If U is not an instruction, or it's not a clone, skip it.
+ if (!U || U->getParent())
+ continue;
+ for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
+ Value *Op = U->getOperand(i);
+ if (Op == OldV) {
+ U->setOperand(i, NewV);
+ unuse(OldV);
+ } else {
+ Q.push_back(Op);
+ }
+ }
+ }
+ return Tree;
+}
+
+
+void Simplifier::Context::replace(Value *OldV, Value *NewV) {
+ if (Root == OldV) {
+ Root = NewV;
+ use(Root);
+ return;
+ }
+
+ // NewV may be a complex tree that has just been created by one of the
+ // transformation rules. We need to make sure that it is commoned with
+ // the existing Root to the maximum extent possible.
+ // Identify all subtrees of NewV (including NewV itself) that have
+ // equivalent counterparts in Root, and replace those subtrees with
+ // these counterparts.
+ WorkListType Q;
+ Q.push_back(NewV);
+ while (!Q.empty()) {
+ Value *V = Q.pop_front_val();
+ Instruction *U = dyn_cast<Instruction>(V);
+ if (!U || U->getParent())
+ continue;
+ if (Value *DupV = find(Root, V)) {
+ if (DupV != V)
+ NewV = subst(NewV, V, DupV);
+ } else {
+ for (Value *Op : U->operands())
+ Q.push_back(Op);
+ }
+ }
+
+ // Now, simply replace OldV with NewV in Root.
+ Root = subst(Root, OldV, NewV);
+ use(Root);
+}
+
+
+void Simplifier::Context::cleanup() {
+ for (Value *V : Clones) {
+ Instruction *U = cast<Instruction>(V);
+ if (!U->getParent())
+ U->dropAllReferences();
+ }
+
+ for (Value *V : Clones) {
+ Instruction *U = cast<Instruction>(V);
+ if (!U->getParent())
+ delete U;
+ }
+}
+
+
+bool Simplifier::Context::equal(const Instruction *I,
+ const Instruction *J) const {
+ if (I == J)
+ return true;
+ if (!I->isSameOperationAs(J))
+ return false;
+ if (isa<PHINode>(I))
+ return I->isIdenticalTo(J);
+
+ for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
+ Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
+ if (OpI == OpJ)
+ continue;
+ auto *InI = dyn_cast<const Instruction>(OpI);
+ auto *InJ = dyn_cast<const Instruction>(OpJ);
+ if (InI && InJ) {
+ if (!equal(InI, InJ))
+ return false;
+ } else if (InI != InJ || !InI)
+ return false;
+ }
+ return true;
+}
+
+
+Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
+ Instruction *SubI = dyn_cast<Instruction>(Sub);
+ WorkListType Q;
+ Q.push_back(Tree);
+
+ while (!Q.empty()) {
+ Value *V = Q.pop_front_val();
+ if (V == Sub)
+ return V;
+ Instruction *U = dyn_cast<Instruction>(V);
+ if (!U || U->getParent())
+ continue;
+ if (SubI && equal(SubI, U))
+ return U;
+ assert(!isa<PHINode>(U));
+ for (Value *Op : U->operands())
+ Q.push_back(Op);
+ }
+ return nullptr;
+}
+
+
+void Simplifier::Context::link(Instruction *I, BasicBlock *B,
+ BasicBlock::iterator At) {
+ if (I->getParent())
+ return;
+
+ for (Value *Op : I->operands()) {
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ link(OpI, B, At);
+ }
+
+ B->getInstList().insert(At, I);
+}
+
+
+Value *Simplifier::Context::materialize(BasicBlock *B,
+ BasicBlock::iterator At) {
+ if (Instruction *RootI = dyn_cast<Instruction>(Root))
+ link(RootI, B, At);
+ return Root;
+}
+
+
+Value *Simplifier::simplify(Context &C) {
+ WorkListType Q;
+ Q.push_back(C.Root);
+ unsigned Count = 0;
+ const unsigned Limit = 100000;
+
+ while (!Q.empty()) {
+ if (Count++ >= Limit)
+ break;
+ Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
+ if (!U || U->getParent() || !C.Used.count(U))
+ continue;
+ bool Changed = false;
+ for (Rule &R : Rules) {
+ Value *W = R(U, C.Ctx);
+ if (!W)
+ continue;
+ Changed = true;
+ C.record(W);
+ C.replace(U, W);
+ Q.push_back(C.Root);
+ break;
+ }
+ if (!Changed) {
+ for (Value *Op : U->operands())
+ Q.push_back(Op);
+ }
+ }
+ assert(Count < Limit && "Infinite loop in HLIR/simplify?");
+ return C.Root;
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// Implementation of PolynomialMultiplyRecognize
+//
+//===----------------------------------------------------------------------===//
+
+namespace {
+ class PolynomialMultiplyRecognize {
+ public:
+ explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
+ const DominatorTree &dt, const TargetLibraryInfo &tli,
+ ScalarEvolution &se)
+ : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
+
+ bool recognize();
+ private:
+ typedef SetVector<Value*> ValueSeq;
+
+ IntegerType *getPmpyType() const {
+ LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
+ return IntegerType::get(Ctx, 32);
+ }
+ bool isPromotableTo(Value *V, IntegerType *Ty);
+ void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
+ bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
+
+ Value *getCountIV(BasicBlock *BB);
+ bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
+ void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
+ ValueSeq &Late);
+ bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
+ bool commutesWithShift(Instruction *I);
+ bool highBitsAreZero(Value *V, unsigned IterCount);
+ bool keepsHighBitsZero(Value *V, unsigned IterCount);
+ bool isOperandShifted(Instruction *I, Value *Op);
+ bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
+ unsigned IterCount);
+ void cleanupLoopBody(BasicBlock *LoopB);
+
+ struct ParsedValues {
+ ParsedValues() : M(nullptr), P(nullptr), Q(nullptr), R(nullptr),
+ X(nullptr), Res(nullptr), IterCount(0), Left(false), Inv(false) {}
+ Value *M, *P, *Q, *R, *X;
+ Instruction *Res;
+ unsigned IterCount;
+ bool Left, Inv;
+ };
+
+ bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
+ bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
+ bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
+ Value *CIV, ParsedValues &PV, bool PreScan);
+ unsigned getInverseMxN(unsigned QP);
+ Value *generate(BasicBlock::iterator At, ParsedValues &PV);
+
+ void setupSimplifier();
+
+ Simplifier Simp;
+ Loop *CurLoop;
+ const DataLayout &DL;
+ const DominatorTree &DT;
+ const TargetLibraryInfo &TLI;
+ ScalarEvolution &SE;
+ };
+}
+
+
+Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (std::distance(PI, PE) != 2)
+ return nullptr;
+ BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
+
+ for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
+ auto *PN = cast<PHINode>(I);
+ Value *InitV = PN->getIncomingValueForBlock(PB);
+ if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
+ continue;
+ Value *IterV = PN->getIncomingValueForBlock(BB);
+ if (!isa<BinaryOperator>(IterV))
+ continue;
+ auto *BO = dyn_cast<BinaryOperator>(IterV);
+ if (BO->getOpcode() != Instruction::Add)
+ continue;
+ Value *IncV = nullptr;
+ if (BO->getOperand(0) == PN)
+ IncV = BO->getOperand(1);
+ else if (BO->getOperand(1) == PN)
+ IncV = BO->getOperand(0);
+ if (IncV == nullptr)
+ continue;
+
+ if (auto *T = dyn_cast<ConstantInt>(IncV))
+ if (T->getZExtValue() == 1)
+ return PN;
+ }
+ return nullptr;
+}
+
+
+static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
+ for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
+ Use &TheUse = UI.getUse();
+ ++UI;
+ if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
+ if (BB == II->getParent())
+ II->replaceUsesOfWith(I, J);
+ }
+}
+
+
+bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
+ Value *CIV, ParsedValues &PV) {
+ // Match the following:
+ // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
+ // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
+ // The condition may also check for equality with the masked value, i.e
+ // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
+ // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
+
+ Value *CondV = SelI->getCondition();
+ Value *TrueV = SelI->getTrueValue();
+ Value *FalseV = SelI->getFalseValue();
+
+ using namespace PatternMatch;
+
+ CmpInst::Predicate P;
+ Value *A = nullptr, *B = nullptr, *C = nullptr;
+
+ if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
+ !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
+ return false;
+ if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
+ return false;
+ // Matched: select (A & B) == C ? ... : ...
+ // select (A & B) != C ? ... : ...
+
+ Value *X = nullptr, *Sh1 = nullptr;
+ // Check (A & B) for (X & (1 << i)):
+ if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
+ Sh1 = A;
+ X = B;
+ } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
+ Sh1 = B;
+ X = A;
+ } else {
+ // TODO: Could also check for an induction variable containing single
+ // bit shifted left by 1 in each iteration.
+ return false;
+ }
+
+ bool TrueIfZero;
+
+ // Check C against the possible values for comparison: 0 and (1 << i):
+ if (match(C, m_Zero()))
+ TrueIfZero = (P == CmpInst::ICMP_EQ);
+ else if (C == Sh1)
+ TrueIfZero = (P == CmpInst::ICMP_NE);
+ else
+ return false;
+
+ // So far, matched:
+ // select (X & (1 << i)) ? ... : ...
+ // including variations of the check against zero/non-zero value.
+
+ Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
+ if (TrueIfZero) {
+ ShouldSameV = TrueV;
+ ShouldXoredV = FalseV;
+ } else {
+ ShouldSameV = FalseV;
+ ShouldXoredV = TrueV;
+ }
+
+ Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
+ Value *T = nullptr;
+ if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
+ // Matched: select +++ ? ... : Y ^ Z
+ // select +++ ? Y ^ Z : ...
+ // where +++ denotes previously checked matches.
+ if (ShouldSameV == Y)
+ T = Z;
+ else if (ShouldSameV == Z)
+ T = Y;
+ else
+ return false;
+ R = ShouldSameV;
+ // Matched: select +++ ? R : R ^ T
+ // select +++ ? R ^ T : R
+ // depending on TrueIfZero.
+
+ } else if (match(ShouldSameV, m_Zero())) {
+ // Matched: select +++ ? 0 : ...
+ // select +++ ? ... : 0
+ if (!SelI->hasOneUse())
+ return false;
+ T = ShouldXoredV;
+ // Matched: select +++ ? 0 : T
+ // select +++ ? T : 0
+
+ Value *U = *SelI->user_begin();
+ if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
+ !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
+ return false;
+ // Matched: xor (select +++ ? 0 : T), R
+ // xor (select +++ ? T : 0), R
+ } else
+ return false;
+
+ // The xor input value T is isolated into its own match so that it could
+ // be checked against an induction variable containing a shifted bit
+ // (todo).
+ // For now, check against (Q << i).
+ if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
+ !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
+ return false;
+ // Matched: select +++ ? R : R ^ (Q << i)
+ // select +++ ? R ^ (Q << i) : R
+
+ PV.X = X;
+ PV.Q = Q;
+ PV.R = R;
+ PV.Left = true;
+ return true;
+}
+
+
+bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
+ ParsedValues &PV) {
+ // Match the following:
+ // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
+ // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
+ // The condition may also check for equality with the masked value, i.e
+ // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
+ // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
+
+ Value *CondV = SelI->getCondition();
+ Value *TrueV = SelI->getTrueValue();
+ Value *FalseV = SelI->getFalseValue();
+
+ using namespace PatternMatch;
+
+ Value *C = nullptr;
+ CmpInst::Predicate P;
+ bool TrueIfZero;
+
+ if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
+ match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
+ if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
+ return false;
+ // Matched: select C == 0 ? ... : ...
+ // select C != 0 ? ... : ...
+ TrueIfZero = (P == CmpInst::ICMP_EQ);
+ } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
+ match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
+ if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
+ return false;
+ // Matched: select C == 1 ? ... : ...
+ // select C != 1 ? ... : ...
+ TrueIfZero = (P == CmpInst::ICMP_NE);
+ } else
+ return false;
+
+ Value *X = nullptr;
+ if (!match(C, m_And(m_Value(X), m_One())) &&
+ !match(C, m_And(m_One(), m_Value(X))))
+ return false;
+ // Matched: select (X & 1) == +++ ? ... : ...
+ // select (X & 1) != +++ ? ... : ...
+
+ Value *R = nullptr, *Q = nullptr;
+ if (TrueIfZero) {
+ // The select's condition is true if the tested bit is 0.
+ // TrueV must be the shift, FalseV must be the xor.
+ if (!match(TrueV, m_LShr(m_Value(R), m_One())))
+ return false;
+ // Matched: select +++ ? (R >> 1) : ...
+ if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
+ !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
+ return false;
+ // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
+ // with commuting ^.
+ } else {
+ // The select's condition is true if the tested bit is 1.
+ // TrueV must be the xor, FalseV must be the shift.
+ if (!match(FalseV, m_LShr(m_Value(R), m_One())))
+ return false;
+ // Matched: select +++ ? ... : (R >> 1)
+ if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
+ !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
+ return false;
+ // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
+ // with commuting ^.
+ }
+
+ PV.X = X;
+ PV.Q = Q;
+ PV.R = R;
+ PV.Left = false;
+ return true;
+}
+
+
+bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
+ BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
+ bool PreScan) {
+ using namespace PatternMatch;
+ // The basic pattern for R = P.Q is:
+ // for i = 0..31
+ // R = phi (0, R')
+ // if (P & (1 << i)) ; test-bit(P, i)
+ // R' = R ^ (Q << i)
+ //
+ // Similarly, the basic pattern for R = (P/Q).Q - P
+ // for i = 0..31
+ // R = phi(P, R')
+ // if (R & (1 << i))
+ // R' = R ^ (Q << i)
+
+ // There exist idioms, where instead of Q being shifted left, P is shifted
+ // right. This produces a result that is shifted right by 32 bits (the
+ // non-shifted result is 64-bit).
+ //
+ // For R = P.Q, this would be:
+ // for i = 0..31
+ // R = phi (0, R')
+ // if ((P >> i) & 1)
+ // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
+ // else ; be shifted by 1, not i.
+ // R' = R >> 1
+ //
+ // And for the inverse:
+ // for i = 0..31
+ // R = phi (P, R')
+ // if (R & 1)
+ // R' = (R >> 1) ^ Q
+ // else
+ // R' = R >> 1
+
+ // The left-shifting idioms share the same pattern:
+ // select (X & (1 << i)) ? R ^ (Q << i) : R
+ // Similarly for right-shifting idioms:
+ // select (X & 1) ? (R >> 1) ^ Q
+
+ if (matchLeftShift(SelI, CIV, PV)) {
+ // If this is a pre-scan, getting this far is sufficient.
+ if (PreScan)
+ return true;
+
+ // Need to make sure that the SelI goes back into R.
+ auto *RPhi = dyn_cast<PHINode>(PV.R);
+ if (!RPhi)
+ return false;
+ if (SelI != RPhi->getIncomingValueForBlock(LoopB))
+ return false;
+ PV.Res = SelI;
+
+ // If X is loop invariant, it must be the input polynomial, and the
+ // idiom is the basic polynomial multiply.
+ if (CurLoop->isLoopInvariant(PV.X)) {
+ PV.P = PV.X;
+ PV.Inv = false;
+ } else {
+ // X is not loop invariant. If X == R, this is the inverse pmpy.
+ // Otherwise, check for an xor with an invariant value. If the
+ // variable argument to the xor is R, then this is still a valid
+ // inverse pmpy.
+ PV.Inv = true;
+ if (PV.X != PV.R) {
+ Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
+ if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
+ return false;
+ auto *I1 = dyn_cast<Instruction>(X1);
+ auto *I2 = dyn_cast<Instruction>(X2);
+ if (!I1 || I1->getParent() != LoopB) {
+ Var = X2;
+ Inv = X1;
+ } else if (!I2 || I2->getParent() != LoopB) {
+ Var = X1;
+ Inv = X2;
+ } else
+ return false;
+ if (Var != PV.R)
+ return false;
+ PV.M = Inv;
+ }
+ // The input polynomial P still needs to be determined. It will be
+ // the entry value of R.
+ Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
+ PV.P = EntryP;
+ }
+
+ return true;
+ }
+
+ if (matchRightShift(SelI, PV)) {
+ // If this is an inverse pattern, the Q polynomial must be known at
+ // compile time.
+ if (PV.Inv && !isa<ConstantInt>(PV.Q))
+ return false;
+ if (PreScan)
+ return true;
+ // There is no exact matching of right-shift pmpy.
+ return false;
+ }
+
+ return false;
+}
+
+
+bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
+ IntegerType *DestTy) {
+ IntegerType *T = dyn_cast<IntegerType>(Val->getType());
+ if (!T || T->getBitWidth() > DestTy->getBitWidth())
+ return false;
+ if (T->getBitWidth() == DestTy->getBitWidth())
+ return true;
+ // Non-instructions are promotable. The reason why an instruction may not
+ // be promotable is that it may produce a different result if its operands
+ // and the result are promoted, for example, it may produce more non-zero
+ // bits. While it would still be possible to represent the proper result
+ // in a wider type, it may require adding additional instructions (which
+ // we don't want to do).
+ Instruction *In = dyn_cast<Instruction>(Val);
+ if (!In)
+ return true;
+ // The bitwidth of the source type is smaller than the destination.
+ // Check if the individual operation can be promoted.
+ switch (In->getOpcode()) {
+ case Instruction::PHI:
+ case Instruction::ZExt:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::LShr: // Shift right is ok.
+ case Instruction::Select:
+ return true;
+ case Instruction::ICmp:
+ if (CmpInst *CI = cast<CmpInst>(In))
+ return CI->isEquality() || CI->isUnsigned();
+ llvm_unreachable("Cast failed unexpectedly");
+ case Instruction::Add:
+ return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
+ }
+ return false;
+}
+
+
+void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
+ IntegerType *DestTy, BasicBlock *LoopB) {
+ // Leave boolean values alone.
+ if (!In->getType()->isIntegerTy(1))
+ In->mutateType(DestTy);
+ unsigned DestBW = DestTy->getBitWidth();
+
+ // Handle PHIs.
+ if (PHINode *P = dyn_cast<PHINode>(In)) {
+ unsigned N = P->getNumIncomingValues();
+ for (unsigned i = 0; i != N; ++i) {
+ BasicBlock *InB = P->getIncomingBlock(i);
+ if (InB == LoopB)
+ continue;
+ Value *InV = P->getIncomingValue(i);
+ IntegerType *Ty = cast<IntegerType>(InV->getType());
+ // Do not promote values in PHI nodes of type i1.
+ if (Ty != P->getType()) {
+ // If the value type does not match the PHI type, the PHI type
+ // must have been promoted.
+ assert(Ty->getBitWidth() < DestBW);
+ InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
+ P->setIncomingValue(i, InV);
+ }
+ }
+ } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
+ Value *Op = Z->getOperand(0);
+ if (Op->getType() == Z->getType())
+ Z->replaceAllUsesWith(Op);
+ Z->eraseFromParent();
+ return;
+ }
+
+ // Promote immediates.
+ for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
+ if (CI->getType()->getBitWidth() < DestBW)
+ In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
+ }
+}
+
+
+bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
+ BasicBlock *ExitB) {
+ assert(LoopB);
+ // Skip loops where the exit block has more than one predecessor. The values
+ // coming from the loop block will be promoted to another type, and so the
+ // values coming into the exit block from other predecessors would also have
+ // to be promoted.
+ if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
+ return false;
+ IntegerType *DestTy = getPmpyType();
+ // Check if the exit values have types that are no wider than the type
+ // that we want to promote to.
+ unsigned DestBW = DestTy->getBitWidth();
+ for (Instruction &In : *ExitB) {
+ PHINode *P = dyn_cast<PHINode>(&In);
+ if (!P)
+ break;
+ if (P->getNumIncomingValues() != 1)
+ return false;
+ assert(P->getIncomingBlock(0) == LoopB);
+ IntegerType *T = dyn_cast<IntegerType>(P->getType());
+ if (!T || T->getBitWidth() > DestBW)
+ return false;
+ }
+
+ // Check all instructions in the loop.
+ for (Instruction &In : *LoopB)
+ if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
+ return false;
+
+ // Perform the promotion.
+ std::vector<Instruction*> LoopIns;
+ std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
+ [](Instruction &In) { return &In; });
+ for (Instruction *In : LoopIns)
+ promoteTo(In, DestTy, LoopB);
+
+ // Fix up the PHI nodes in the exit block.
+ Instruction *EndI = ExitB->getFirstNonPHI();
+ BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
+ for (auto I = ExitB->begin(); I != End; ++I) {
+ PHINode *P = dyn_cast<PHINode>(I);
+ if (!P)
+ break;
+ Type *Ty0 = P->getIncomingValue(0)->getType();
+ Type *PTy = P->getType();
+ if (PTy != Ty0) {
+ assert(Ty0 == DestTy);
+ // In order to create the trunc, P must have the promoted type.
+ P->mutateType(Ty0);
+ Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
+ // In order for the RAUW to work, the types of P and T must match.
+ P->mutateType(PTy);
+ P->replaceAllUsesWith(T);
+ // Final update of the P's type.
+ P->mutateType(Ty0);
+ cast<Instruction>(T)->setOperand(0, P);
+ }
+ }
+
+ return true;
+}
+
+
+bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
+ ValueSeq &Cycle) {
+ // Out = ..., In, ...
+ if (Out == In)
+ return true;
+
+ auto *BB = cast<Instruction>(Out)->getParent();
+ bool HadPhi = false;
+
+ for (auto U : Out->users()) {
+ auto *I = dyn_cast<Instruction>(&*U);
+ if (I == nullptr || I->getParent() != BB)
+ continue;
+ // Make sure that there are no multi-iteration cycles, e.g.
+ // p1 = phi(p2)
+ // p2 = phi(p1)
+ // The cycle p1->p2->p1 would span two loop iterations.
+ // Check that there is only one phi in the cycle.
+ bool IsPhi = isa<PHINode>(I);
+ if (IsPhi && HadPhi)
+ return false;
+ HadPhi |= IsPhi;
+ if (Cycle.count(I))
+ return false;
+ Cycle.insert(I);
+ if (findCycle(I, In, Cycle))
+ break;
+ Cycle.remove(I);
+ }
+ return !Cycle.empty();
+}
+
+
+void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
+ ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
+ // All the values in the cycle that are between the phi node and the
+ // divider instruction will be classified as "early", all other values
+ // will be "late".
+
+ bool IsE = true;
+ unsigned I, N = Cycle.size();
+ for (I = 0; I < N; ++I) {
+ Value *V = Cycle[I];
+ if (DivI == V)
+ IsE = false;
+ else if (!isa<PHINode>(V))
+ continue;
+ // Stop if found either.
+ break;
+ }
+ // "I" is the index of either DivI or the phi node, whichever was first.
+ // "E" is "false" or "true" respectively.
+ ValueSeq &First = !IsE ? Early : Late;
+ for (unsigned J = 0; J < I; ++J)
+ First.insert(Cycle[J]);
+
+ ValueSeq &Second = IsE ? Early : Late;
+ Second.insert(Cycle[I]);
+ for (++I; I < N; ++I) {
+ Value *V = Cycle[I];
+ if (DivI == V || isa<PHINode>(V))
+ break;
+ Second.insert(V);
+ }
+
+ for (; I < N; ++I)
+ First.insert(Cycle[I]);
+}
+
+
+bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
+ ValueSeq &Early, ValueSeq &Late) {
+ // Select is an exception, since the condition value does not have to be
+ // classified in the same way as the true/false values. The true/false
+ // values do have to be both early or both late.
+ if (UseI->getOpcode() == Instruction::Select) {
+ Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
+ if (Early.count(TV) || Early.count(FV)) {
+ if (Late.count(TV) || Late.count(FV))
+ return false;
+ Early.insert(UseI);
+ } else if (Late.count(TV) || Late.count(FV)) {
+ if (Early.count(TV) || Early.count(FV))
+ return false;
+ Late.insert(UseI);
+ }
+ return true;
+ }
+
+ // Not sure what would be the example of this, but the code below relies
+ // on having at least one operand.
+ if (UseI->getNumOperands() == 0)
+ return true;
+
+ bool AE = true, AL = true;
+ for (auto &I : UseI->operands()) {
+ if (Early.count(&*I))
+ AL = false;
+ else if (Late.count(&*I))
+ AE = false;
+ }
+ // If the operands appear "all early" and "all late" at the same time,
+ // then it means that none of them are actually classified as either.
+ // This is harmless.
+ if (AE && AL)
+ return true;
+ // Conversely, if they are neither "all early" nor "all late", then
+ // we have a mixture of early and late operands that is not a known
+ // exception.
+ if (!AE && !AL)
+ return false;
+
+ // Check that we have covered the two special cases.
+ assert(AE != AL);
+
+ if (AE)
+ Early.insert(UseI);
+ else
+ Late.insert(UseI);
+ return true;
+}
+
+
+bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::LShr:
+ case Instruction::Shl:
+ case Instruction::Select:
+ case Instruction::ICmp:
+ case Instruction::PHI:
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+
+bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
+ unsigned IterCount) {
+ auto *T = dyn_cast<IntegerType>(V->getType());
+ if (!T)
+ return false;
+
+ unsigned BW = T->getBitWidth();
+ APInt K0(BW, 0), K1(BW, 0);
+ computeKnownBits(V, K0, K1, DL);
+ return K0.countLeadingOnes() >= IterCount;
+}
+
+
+bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
+ unsigned IterCount) {
+ // Assume that all inputs to the value have the high bits zero.
+ // Check if the value itself preserves the zeros in the high bits.
+ if (auto *C = dyn_cast<ConstantInt>(V))
+ return C->getValue().countLeadingZeros() >= IterCount;
+
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ switch (I->getOpcode()) {
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::LShr:
+ case Instruction::Select:
+ case Instruction::ICmp:
+ case Instruction::PHI:
+ case Instruction::ZExt:
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
+ unsigned Opc = I->getOpcode();
+ if (Opc == Instruction::Shl || Opc == Instruction::LShr)
+ return Op != I->getOperand(1);
+ return true;
+}
+
+
+bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
+ BasicBlock *ExitB, unsigned IterCount) {
+ Value *CIV = getCountIV(LoopB);
+ if (CIV == nullptr)
+ return false;
+ auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
+ if (CIVTy == nullptr)
+ return false;
+
+ ValueSeq RShifts;
+ ValueSeq Early, Late, Cycled;
+
+ // Find all value cycles that contain logical right shifts by 1.
+ for (Instruction &I : *LoopB) {
+ using namespace PatternMatch;
+ Value *V = nullptr;
+ if (!match(&I, m_LShr(m_Value(V), m_One())))
+ continue;
+ ValueSeq C;
+ if (!findCycle(&I, V, C))
+ continue;
+
+ // Found a cycle.
+ C.insert(&I);
+ classifyCycle(&I, C, Early, Late);
+ Cycled.insert(C.begin(), C.end());
+ RShifts.insert(&I);
+ }
+
+ // Find the set of all values affected by the shift cycles, i.e. all
+ // cycled values, and (recursively) all their users.
+ ValueSeq Users(Cycled.begin(), Cycled.end());
+ for (unsigned i = 0; i < Users.size(); ++i) {
+ Value *V = Users[i];
+ if (!isa<IntegerType>(V->getType()))
+ return false;
+ auto *R = cast<Instruction>(V);
+ // If the instruction does not commute with shifts, the loop cannot
+ // be unshifted.
+ if (!commutesWithShift(R))
+ return false;
+ for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
+ auto *T = cast<Instruction>(*I);
+ // Skip users from outside of the loop. They will be handled later.
+ // Also, skip the right-shifts and phi nodes, since they mix early
+ // and late values.
+ if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
+ continue;
+
+ Users.insert(T);
+ if (!classifyInst(T, Early, Late))
+ return false;
+ }
+ }
+
+ if (Users.size() == 0)
+ return false;
+
+ // Verify that high bits remain zero.
+ ValueSeq Internal(Users.begin(), Users.end());
+ ValueSeq Inputs;
+ for (unsigned i = 0; i < Internal.size(); ++i) {
+ auto *R = dyn_cast<Instruction>(Internal[i]);
+ if (!R)
+ continue;
+ for (Value *Op : R->operands()) {
+ auto *T = dyn_cast<Instruction>(Op);
+ if (T && T->getParent() != LoopB)
+ Inputs.insert(Op);
+ else
+ Internal.insert(Op);
+ }
+ }
+ for (Value *V : Inputs)
+ if (!highBitsAreZero(V, IterCount))
+ return false;
+ for (Value *V : Internal)
+ if (!keepsHighBitsZero(V, IterCount))
+ return false;
+
+ // Finally, the work can be done. Unshift each user.
+ IRBuilder<> IRB(LoopB);
+ std::map<Value*,Value*> ShiftMap;
+ typedef std::map<std::pair<Value*,Type*>,Value*> CastMapType;
+ CastMapType CastMap;
+
+ auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
+ IntegerType *Ty) -> Value* {
+ auto H = CM.find(std::make_pair(V, Ty));
+ if (H != CM.end())
+ return H->second;
+ Value *CV = IRB.CreateIntCast(V, Ty, false);
+ CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
+ return CV;
+ };
+
+ for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
+ if (isa<PHINode>(I) || !Users.count(&*I))
+ continue;
+ using namespace PatternMatch;
+ // Match lshr x, 1.
+ Value *V = nullptr;
+ if (match(&*I, m_LShr(m_Value(V), m_One()))) {
+ replaceAllUsesOfWithIn(&*I, V, LoopB);
+ continue;
+ }
+ // For each non-cycled operand, replace it with the corresponding
+ // value shifted left.
+ for (auto &J : I->operands()) {
+ Value *Op = J.get();
+ if (!isOperandShifted(&*I, Op))
+ continue;
+ if (Users.count(Op))
+ continue;
+ // Skip shifting zeros.
+ if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
+ continue;
+ // Check if we have already generated a shift for this value.
+ auto F = ShiftMap.find(Op);
+ Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
+ if (W == nullptr) {
+ IRB.SetInsertPoint(&*I);
+ // First, the shift amount will be CIV or CIV+1, depending on
+ // whether the value is early or late. Instead of creating CIV+1,
+ // do a single shift of the value.
+ Value *ShAmt = CIV, *ShVal = Op;
+ auto *VTy = cast<IntegerType>(ShVal->getType());
+ auto *ATy = cast<IntegerType>(ShAmt->getType());
+ if (Late.count(&*I))
+ ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
+ // Second, the types of the shifted value and the shift amount
+ // must match.
+ if (VTy != ATy) {
+ if (VTy->getBitWidth() < ATy->getBitWidth())
+ ShVal = upcast(CastMap, IRB, ShVal, ATy);
+ else
+ ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
+ }
+ // Ready to generate the shift and memoize it.
+ W = IRB.CreateShl(ShVal, ShAmt);
+ ShiftMap.insert(std::make_pair(Op, W));
+ }
+ I->replaceUsesOfWith(Op, W);
+ }
+ }
+
+ // Update the users outside of the loop to account for having left
+ // shifts. They would normally be shifted right in the loop, so shift
+ // them right after the loop exit.
+ // Take advantage of the loop-closed SSA form, which has all the post-
+ // loop values in phi nodes.
+ IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
+ for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
+ if (!isa<PHINode>(P))
+ break;
+ auto *PN = cast<PHINode>(P);
+ Value *U = PN->getIncomingValueForBlock(LoopB);
+ if (!Users.count(U))
+ continue;
+ Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
+ PN->replaceAllUsesWith(S);
+ // The above RAUW will create
+ // S = lshr S, IterCount
+ // so we need to fix it back into
+ // S = lshr PN, IterCount
+ cast<User>(S)->replaceUsesOfWith(S, PN);
+ }
+
+ return true;
+}
+
+
+void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
+ for (auto &I : *LoopB)
+ if (Value *SV = SimplifyInstruction(&I, DL, &TLI, &DT))
+ I.replaceAllUsesWith(SV);
+
+ for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
+ N = std::next(I);
+ RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
+ }
+}
+
+
+unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
+ // Arrays of coefficients of Q and the inverse, C.
+ // Q[i] = coefficient at x^i.
+ std::array<char,32> Q, C;
+
+ for (unsigned i = 0; i < 32; ++i) {
+ Q[i] = QP & 1;
+ QP >>= 1;
+ }
+ assert(Q[0] == 1);
+
+ // Find C, such that
+ // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
+ //
+ // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
+ // operations * and + are & and ^ respectively.
+ //
+ // Find C[i] recursively, by comparing i-th coefficient in the product
+ // with 0 (or 1 for i=0).
+ //
+ // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
+ C[0] = 1;
+ for (unsigned i = 1; i < 32; ++i) {
+ // Solve for C[i] in:
+ // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
+ // This is equivalent to
+ // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
+ // which is
+ // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
+ unsigned T = 0;
+ for (unsigned j = 0; j < i; ++j)
+ T = T ^ (C[j] & Q[i-j]);
+ C[i] = T;
+ }
+
+ unsigned QV = 0;
+ for (unsigned i = 0; i < 32; ++i)
+ if (C[i])
+ QV |= (1 << i);
+
+ return QV;
+}
+
+
+Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
+ ParsedValues &PV) {
+ IRBuilder<> B(&*At);
+ Module *M = At->getParent()->getParent()->getParent();
+ Value *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
+
+ Value *P = PV.P, *Q = PV.Q, *P0 = P;
+ unsigned IC = PV.IterCount;
+
+ if (PV.M != nullptr)
+ P0 = P = B.CreateXor(P, PV.M);
+
+ // Create a bit mask to clear the high bits beyond IterCount.
+ auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
+
+ if (PV.IterCount != 32)
+ P = B.CreateAnd(P, BMI);
+
+ if (PV.Inv) {
+ auto *QI = dyn_cast<ConstantInt>(PV.Q);
+ assert(QI && QI->getBitWidth() <= 32);
+
+ // Again, clearing bits beyond IterCount.
+ unsigned M = (1 << PV.IterCount) - 1;
+ unsigned Tmp = (QI->getZExtValue() | 1) & M;
+ unsigned QV = getInverseMxN(Tmp) & M;
+ auto *QVI = ConstantInt::get(QI->getType(), QV);
+ P = B.CreateCall(PMF, {P, QVI});
+ P = B.CreateTrunc(P, QI->getType());
+ if (IC != 32)
+ P = B.CreateAnd(P, BMI);
+ }
+
+ Value *R = B.CreateCall(PMF, {P, Q});
+
+ if (PV.M != nullptr)
+ R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
+
+ return R;
+}
+
+
+void PolynomialMultiplyRecognize::setupSimplifier() {
+ Simp.addRule(
+ // Sink zext past bitwise operations.
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ if (I->getOpcode() != Instruction::ZExt)
+ return nullptr;
+ Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
+ if (!T)
+ return nullptr;
+ switch (T->getOpcode()) {
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ break;
+ default:
+ return nullptr;
+ }
+ IRBuilder<> B(Ctx);
+ return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
+ B.CreateZExt(T->getOperand(0), I->getType()),
+ B.CreateZExt(T->getOperand(1), I->getType()));
+ });
+ Simp.addRule(
+ // (xor (and x a) (and y a)) -> (and (xor x y) a)
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ if (I->getOpcode() != Instruction::Xor)
+ return nullptr;
+ Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
+ Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
+ if (!And0 || !And1)
+ return nullptr;
+ if (And0->getOpcode() != Instruction::And ||
+ And1->getOpcode() != Instruction::And)
+ return nullptr;
+ if (And0->getOperand(1) != And1->getOperand(1))
+ return nullptr;
+ IRBuilder<> B(Ctx);
+ return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
+ And0->getOperand(1));
+ });
+ Simp.addRule(
+ // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
+ // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
+ if (!BO)
+ return nullptr;
+ Instruction::BinaryOps Op = BO->getOpcode();
+ if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
+ IRBuilder<> B(Ctx);
+ Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
+ Value *Z = BO->getOperand(1);
+ return B.CreateSelect(Sel->getCondition(),
+ B.CreateBinOp(Op, X, Z),
+ B.CreateBinOp(Op, Y, Z));
+ }
+ if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
+ IRBuilder<> B(Ctx);
+ Value *X = BO->getOperand(0);
+ Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
+ return B.CreateSelect(Sel->getCondition(),
+ B.CreateBinOp(Op, X, Y),
+ B.CreateBinOp(Op, X, Z));
+ }
+ return nullptr;
+ });
+ Simp.addRule(
+ // (select c (select c x y) z) -> (select c x z)
+ // (select c x (select c y z)) -> (select c x z)
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ SelectInst *Sel = dyn_cast<SelectInst>(I);
+ if (!Sel)
+ return nullptr;
+ IRBuilder<> B(Ctx);
+ Value *C = Sel->getCondition();
+ if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
+ if (Sel0->getCondition() == C)
+ return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
+ }
+ if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
+ if (Sel1->getCondition() == C)
+ return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
+ }
+ return nullptr;
+ });
+ Simp.addRule(
+ // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ if (I->getOpcode() != Instruction::Or)
+ return nullptr;
+ Instruction *LShr = dyn_cast<Instruction>(I->getOperand(0));
+ if (!LShr || LShr->getOpcode() != Instruction::LShr)
+ return nullptr;
+ ConstantInt *One = dyn_cast<ConstantInt>(LShr->getOperand(1));
+ if (!One || One->getZExtValue() != 1)
+ return nullptr;
+ ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
+ return nullptr;
+ return IRBuilder<>(Ctx).CreateXor(LShr, Msb);
+ });
+ Simp.addRule(
+ // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ if (I->getOpcode() != Instruction::LShr)
+ return nullptr;
+ BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
+ if (!BitOp)
+ return nullptr;
+ switch (BitOp->getOpcode()) {
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ break;
+ default:
+ return nullptr;
+ }
+ IRBuilder<> B(Ctx);
+ Value *S = I->getOperand(1);
+ return B.CreateBinOp(BitOp->getOpcode(),
+ B.CreateLShr(BitOp->getOperand(0), S),
+ B.CreateLShr(BitOp->getOperand(1), S));
+ });
+ Simp.addRule(
+ // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ auto IsBitOp = [](unsigned Op) -> bool {
+ switch (Op) {
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return true;
+ }
+ return false;
+ };
+ BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
+ if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
+ return nullptr;
+ BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
+ if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
+ return nullptr;
+ ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
+ ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
+ if (!CA || !CB)
+ return nullptr;
+ IRBuilder<> B(Ctx);
+ Value *X = BitOp2->getOperand(0);
+ return B.CreateBinOp(BitOp2->getOpcode(), X,
+ B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
+ });
+}
+
+
+bool PolynomialMultiplyRecognize::recognize() {
+ DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
+ << *CurLoop << '\n');
+ // Restrictions:
+ // - The loop must consist of a single block.
+ // - The iteration count must be known at compile-time.
+ // - The loop must have an induction variable starting from 0, and
+ // incremented in each iteration of the loop.
+ BasicBlock *LoopB = CurLoop->getHeader();
+ DEBUG(dbgs() << "Loop header:\n" << *LoopB);
+
+ if (LoopB != CurLoop->getLoopLatch())
+ return false;
+ BasicBlock *ExitB = CurLoop->getExitBlock();
+ if (ExitB == nullptr)
+ return false;
+ BasicBlock *EntryB = CurLoop->getLoopPreheader();
+ if (EntryB == nullptr)
+ return false;
+
+ unsigned IterCount = 0;
+ const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
+ if (isa<SCEVCouldNotCompute>(CT))
+ return false;
+ if (auto *CV = dyn_cast<SCEVConstant>(CT))
+ IterCount = CV->getValue()->getZExtValue() + 1;
+
+ Value *CIV = getCountIV(LoopB);
+ ParsedValues PV;
+ PV.IterCount = IterCount;
+ DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n');
+
+ setupSimplifier();
+
+ // Perform a preliminary scan of select instructions to see if any of them
+ // looks like a generator of the polynomial multiply steps. Assume that a
+ // loop can only contain a single transformable operation, so stop the
+ // traversal after the first reasonable candidate was found.
+ // XXX: Currently this approach can modify the loop before being 100% sure
+ // that the transformation can be carried out.
+ bool FoundPreScan = false;
+ for (Instruction &In : *LoopB) {
+ SelectInst *SI = dyn_cast<SelectInst>(&In);
+ if (!SI)
+ continue;
+
+ Simplifier::Context C(SI);
+ Value *T = Simp.simplify(C);
+ SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
+ DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n');
+ if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
+ FoundPreScan = true;
+ if (SelI != SI) {
+ Value *NewSel = C.materialize(LoopB, SI->getIterator());
+ SI->replaceAllUsesWith(NewSel);
+ RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
+ }
+ break;
+ }
+ }
+
+ if (!FoundPreScan) {
+ DEBUG(dbgs() << "Have not found candidates for pmpy\n");
+ return false;
+ }
+
+ if (!PV.Left) {
+ // The right shift version actually only returns the higher bits of
+ // the result (each iteration discards the LSB). If we want to convert it
+ // to a left-shifting loop, the working data type must be at least as
+ // wide as the target's pmpy instruction.
+ if (!promoteTypes(LoopB, ExitB))
+ return false;
+ convertShiftsToLeft(LoopB, ExitB, IterCount);
+ cleanupLoopBody(LoopB);
+ }
+
+ // Scan the loop again, find the generating select instruction.
+ bool FoundScan = false;
+ for (Instruction &In : *LoopB) {
+ SelectInst *SelI = dyn_cast<SelectInst>(&In);
+ if (!SelI)
+ continue;
+ DEBUG(dbgs() << "scanSelect: " << *SelI << '\n');
+ FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
+ if (FoundScan)
+ break;
+ }
+ assert(FoundScan);
+
+ DEBUG({
+ StringRef PP = (PV.M ? "(P+M)" : "P");
+ if (!PV.Inv)
+ dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";
+ else
+ dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "
+ << PP << "\n";
+ dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";
+ if (PV.M)
+ dbgs() << " M:" << *PV.M << "\n";
+ dbgs() << " Q:" << *PV.Q << "\n";
+ dbgs() << " Iteration count:" << PV.IterCount << "\n";
+ });
+
+ BasicBlock::iterator At(EntryB->getTerminator());
+ Value *PM = generate(At, PV);
+ if (PM == nullptr)
+ return false;
+
+ if (PM->getType() != PV.Res->getType())
+ PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
+
+ PV.Res->replaceAllUsesWith(PM);
+ PV.Res->eraseFromParent();
+ return true;
+}
+
+
+unsigned HexagonLoopIdiomRecognize::getStoreSizeInBytes(StoreInst *SI) {
+ uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
+ assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
+ "Don't overflow unsigned.");
+ return (unsigned)SizeInBits >> 3;
+}
+
+
+int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
+ return SC->getAPInt().getSExtValue();
+ return 0;
+}
+
+
+bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
+ // Allow volatile stores if HexagonVolatileMemcpy is enabled.
+ if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
+ return false;
+
+ Value *StoredVal = SI->getValueOperand();
+ Value *StorePtr = SI->getPointerOperand();
+
+ // Reject stores that are so large that they overflow an unsigned.
+ uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
+ if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+ return false;
+
+ // Check to see if the stride matches the size of the store. If so, then we
+ // know that every byte is touched in the loop.
+ int Stride = getSCEVStride(StoreEv);
+ if (Stride == 0)
+ return false;
+ unsigned StoreSize = getStoreSizeInBytes(SI);
+ if (StoreSize != unsigned(std::abs(Stride)))
+ return false;
+
+ // The store must be feeding a non-volatile load.
+ LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+ if (!LI || !LI->isSimple())
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided load. If we have something else, it's a
+ // random load we can't handle.
+ Value *LoadPtr = LI->getPointerOperand();
+ auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
+ if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
+ return false;
+
+ // The store and load must share the same stride.
+ if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
+ return false;
+
+ // Success. This store can be converted into a memcpy.
+ return true;
+}
+
+
+/// mayLoopAccessLocation - Return true if the specified loop might access the
+/// specified pointer location, which is a loop-strided access. The 'Access'
+/// argument specifies what the verboten forms of access are (read or write).
+static bool
+mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
+ const SCEV *BECount, unsigned StoreSize,
+ AliasAnalysis &AA,
+ SmallPtrSetImpl<Instruction *> &Ignored) {
+ // Get the location that may be stored across the loop. Since the access
+ // is strided positively through memory, we say that the modified location
+ // starts at the pointer and has infinite size.
+ uint64_t AccessSize = MemoryLocation::UnknownSize;
+
+ // If the loop iterates a fixed number of times, we can refine the access
+ // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
+
+ // TODO: For this to be really effective, we have to dive into the pointer
+ // operand in the store. Store to &A[i] of 100 will always return may alias
+ // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
+ // which will then no-alias a store to &A[100].
+ MemoryLocation StoreLoc(Ptr, AccessSize);
+
+ for (auto *B : L->blocks())
+ for (auto &I : *B)
+ if (Ignored.count(&I) == 0 && (AA.getModRefInfo(&I, StoreLoc) & Access))
+ return true;
+
+ return false;
+}
+
+
+void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
+ SmallVectorImpl<StoreInst*> &Stores) {
+ Stores.clear();
+ for (Instruction &I : *BB)
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ if (isLegalStore(CurLoop, SI))
+ Stores.push_back(SI);
+}
+
+
+bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
+ StoreInst *SI, const SCEV *BECount) {
+ assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&
+ "Expected only non-volatile stores, or Hexagon-specific memcpy"
+ "to volatile destination.");
+
+ Value *StorePtr = SI->getPointerOperand();
+ auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ unsigned Stride = getSCEVStride(StoreEv);
+ unsigned StoreSize = getStoreSizeInBytes(SI);
+ if (Stride != StoreSize)
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided load. If we have something else, it's a
+ // random load we can't handle.
+ LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+ auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. This allows us to insert code for it in the preheader.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ Instruction *ExpPt = Preheader->getTerminator();
+ IRBuilder<> Builder(ExpPt);
+ SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
+
+ Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
+
+ // Okay, we have a strided store "p[i]" of a loaded value. We can turn
+ // this into a memcpy/memmove in the loop preheader now if we want. However,
+ // this would be unsafe to do if there is anything else in the loop that may
+ // read or write the memory region we're storing to. For memcpy, this
+ // includes the load that feeds the stores. Check for an alias by generating
+ // the base address and checking everything.
+ Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
+ Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
+ Value *LoadBasePtr = nullptr;
+
+ bool Overlap = false;
+ bool DestVolatile = SI->isVolatile();
+ Type *BECountTy = BECount->getType();
+
+ if (DestVolatile) {
+ // The trip count must fit in i32, since it is the type of the "num_words"
+ // argument to hexagon_memcpy_forward_vp4cp4n2.
+ if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
+CleanupAndExit:
+ // If we generated new code for the base pointer, clean up.
+ Expander.clear();
+ if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
+ RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
+ StoreBasePtr = nullptr;
+ }
+ if (LoadBasePtr) {
+ RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
+ LoadBasePtr = nullptr;
+ }
+ return false;
+ }
+ }
+
+ SmallPtrSet<Instruction*, 2> Ignore1;
+ Ignore1.insert(SI);
+ if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
+ StoreSize, *AA, Ignore1)) {
+ // Check if the load is the offending instruction.
+ Ignore1.insert(LI);
+ if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
+ StoreSize, *AA, Ignore1)) {
+ // Still bad. Nothing we can do.
+ goto CleanupAndExit;
+ }
+ // It worked with the load ignored.
+ Overlap = true;
+ }
+
+ if (!Overlap) {
+ if (DisableMemcpyIdiom || !HasMemcpy)
+ goto CleanupAndExit;
+ } else {
+ // Don't generate memmove if this function will be inlined. This is
+ // because the caller will undergo this transformation after inlining.
+ Function *Func = CurLoop->getHeader()->getParent();
+ if (Func->hasFnAttribute(Attribute::AlwaysInline))
+ goto CleanupAndExit;
+
+ // In case of a memmove, the call to memmove will be executed instead
+ // of the loop, so we need to make sure that there is nothing else in
+ // the loop than the load, store and instructions that these two depend
+ // on.
+ SmallVector<Instruction*,2> Insts;
+ Insts.push_back(SI);
+ Insts.push_back(LI);
+ if (!coverLoop(CurLoop, Insts))
+ goto CleanupAndExit;
+
+ if (DisableMemmoveIdiom || !HasMemmove)
+ goto CleanupAndExit;
+ bool IsNested = CurLoop->getParentLoop() != 0;
+ if (IsNested && OnlyNonNestedMemmove)
+ goto CleanupAndExit;
+ }
+
+ // For a memcpy, we have to make sure that the input array is not being
+ // mutated by the loop.
+ LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
+ Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
+
+ SmallPtrSet<Instruction*, 2> Ignore2;
+ Ignore2.insert(SI);
+ if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
+ *AA, Ignore2))
+ goto CleanupAndExit;
+
+ // Check the stride.
+ bool StridePos = getSCEVStride(LoadEv) >= 0;
+
+ // Currently, the volatile memcpy only emulates traversing memory forward.
+ if (!StridePos && DestVolatile)
+ goto CleanupAndExit;
+
+ bool RuntimeCheck = (Overlap || DestVolatile);
+
+ BasicBlock *ExitB;
+ if (RuntimeCheck) {
+ // The runtime check needs a single exit block.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() != 1)
+ goto CleanupAndExit;
+ ExitB = ExitBlocks[0];
+ }
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ LLVMContext &Ctx = SI->getContext();
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
+ unsigned Alignment = std::min(SI->getAlignment(), LI->getAlignment());
+ DebugLoc DLoc = SI->getDebugLoc();
+
+ const SCEV *NumBytesS =
+ SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
+ SCEV::FlagNUW);
+ Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
+ if (Instruction *In = dyn_cast<Instruction>(NumBytes))
+ if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT))
+ NumBytes = Simp;
+
+ CallInst *NewCall;
+
+ if (RuntimeCheck) {
+ unsigned Threshold = RuntimeMemSizeThreshold;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
+ uint64_t C = CI->getZExtValue();
+ if (Threshold != 0 && C < Threshold)
+ goto CleanupAndExit;
+ if (C < CompileTimeMemSizeThreshold)
+ goto CleanupAndExit;
+ }
+
+ BasicBlock *Header = CurLoop->getHeader();
+ Function *Func = Header->getParent();
+ Loop *ParentL = LF->getLoopFor(Preheader);
+ StringRef HeaderName = Header->getName();
+
+ // Create a new (empty) preheader, and update the PHI nodes in the
+ // header to use the new preheader.
+ BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
+ Func, Header);
+ if (ParentL)
+ ParentL->addBasicBlockToLoop(NewPreheader, *LF);
+ IRBuilder<>(NewPreheader).CreateBr(Header);
+ for (auto &In : *Header) {
+ PHINode *PN = dyn_cast<PHINode>(&In);
+ if (!PN)
+ break;
+ int bx = PN->getBasicBlockIndex(Preheader);
+ if (bx >= 0)
+ PN->setIncomingBlock(bx, NewPreheader);
+ }
+ DT->addNewBlock(NewPreheader, Preheader);
+ DT->changeImmediateDominator(Header, NewPreheader);
+
+ // Check for safe conditions to execute memmove.
+ // If stride is positive, copying things from higher to lower addresses
+ // is equivalent to memmove. For negative stride, it's the other way
+ // around. Copying forward in memory with positive stride may not be
+ // same as memmove since we may be copying values that we just stored
+ // in some previous iteration.
+ Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
+ Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
+ Value *LowA = StridePos ? SA : LA;
+ Value *HighA = StridePos ? LA : SA;
+ Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
+ Value *Cond = CmpA;
+
+ // Check for distance between pointers.
+ Value *Dist = Builder.CreateSub(HighA, LowA);
+ Value *CmpD = Builder.CreateICmpSLT(NumBytes, Dist);
+ Value *CmpEither = Builder.CreateOr(Cond, CmpD);
+ Cond = CmpEither;
+
+ if (Threshold != 0) {
+ Type *Ty = NumBytes->getType();
+ Value *Thr = ConstantInt::get(Ty, Threshold);
+ Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
+ Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
+ Cond = CmpBoth;
+ }
+ BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
+ Func, NewPreheader);
+ if (ParentL)
+ ParentL->addBasicBlockToLoop(MemmoveB, *LF);
+ Instruction *OldT = Preheader->getTerminator();
+ Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
+ OldT->eraseFromParent();
+ Preheader->setName(Preheader->getName()+".old");
+ DT->addNewBlock(MemmoveB, Preheader);
+ // Find the new immediate dominator of the exit block.
+ BasicBlock *ExitD = Preheader;
+ for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
+ BasicBlock *PB = *PI;
+ ExitD = DT->findNearestCommonDominator(ExitD, PB);
+ if (!ExitD)
+ break;
+ }
+ // If the prior immediate dominator of ExitB was dominated by the
+ // old preheader, then the old preheader becomes the new immediate
+ // dominator. Otherwise don't change anything (because the newly
+ // added blocks are dominated by the old preheader).
+ if (ExitD && DT->dominates(Preheader, ExitD)) {
+ DomTreeNode *BN = DT->getNode(ExitB);
+ DomTreeNode *DN = DT->getNode(ExitD);
+ BN->setIDom(DN);
+ }
+
+ // Add a call to memmove to the conditional block.
+ IRBuilder<> CondBuilder(MemmoveB);
+ CondBuilder.CreateBr(ExitB);
+ CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
+
+ if (DestVolatile) {
+ Type *Int32Ty = Type::getInt32Ty(Ctx);
+ Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
+ Type *VoidTy = Type::getVoidTy(Ctx);
+ Module *M = Func->getParent();
+ Constant *CF = M->getOrInsertFunction(HexagonVolatileMemcpyName, VoidTy,
+ Int32PtrTy, Int32PtrTy, Int32Ty);
+ Function *Fn = cast<Function>(CF);
+ Fn->setLinkage(Function::ExternalLinkage);
+
+ const SCEV *OneS = SE->getConstant(Int32Ty, 1);
+ const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
+ const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
+ Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
+ MemmoveB->getTerminator());
+ if (Instruction *In = dyn_cast<Instruction>(NumWords))
+ if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT))
+ NumWords = Simp;
+
+ Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
+ ? StoreBasePtr
+ : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
+ Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
+ ? LoadBasePtr
+ : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
+ NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
+ } else {
+ NewCall = CondBuilder.CreateMemMove(StoreBasePtr, LoadBasePtr,
+ NumBytes, Alignment);
+ }
+ } else {
+ NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr,
+ NumBytes, Alignment);
+ // Okay, the memcpy has been formed. Zap the original store and
+ // anything that feeds into it.
+ RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
+ }
+
+ NewCall->setDebugLoc(DLoc);
+
+ DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")
+ << *NewCall << "\n"
+ << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+ << " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+
+ return true;
+}
+
+
+// \brief Check if the instructions in Insts, together with their dependencies
+// cover the loop in the sense that the loop could be safely eliminated once
+// the instructions in Insts are removed.
+bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
+ SmallVectorImpl<Instruction*> &Insts) const {
+ SmallSet<BasicBlock*,8> LoopBlocks;
+ for (auto *B : L->blocks())
+ LoopBlocks.insert(B);
+
+ SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
+
+ // Collect all instructions from the loop that the instructions in Insts
+ // depend on (plus their dependencies, etc.). These instructions will
+ // constitute the expression trees that feed those in Insts, but the trees
+ // will be limited only to instructions contained in the loop.
+ for (unsigned i = 0; i < Worklist.size(); ++i) {
+ Instruction *In = Worklist[i];
+ for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
+ Instruction *OpI = dyn_cast<Instruction>(I);
+ if (!OpI)
+ continue;
+ BasicBlock *PB = OpI->getParent();
+ if (!LoopBlocks.count(PB))
+ continue;
+ Worklist.insert(OpI);
+ }
+ }
+
+ // Scan all instructions in the loop, if any of them have a user outside
+ // of the loop, or outside of the expressions collected above, then either
+ // the loop has a side-effect visible outside of it, or there are
+ // instructions in it that are not involved in the original set Insts.
+ for (auto *B : L->blocks()) {
+ for (auto &In : *B) {
+ if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
+ continue;
+ if (!Worklist.count(&In) && In.mayHaveSideEffects())
+ return false;
+ for (const auto &K : In.users()) {
+ Instruction *UseI = dyn_cast<Instruction>(K);
+ if (!UseI)
+ continue;
+ BasicBlock *UseB = UseI->getParent();
+ if (LF->getLoopFor(UseB) != L)
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/// runOnLoopBlock - Process the specified block, which lives in a counted loop
+/// with the specified backedge count. This block is known to be in the current
+/// loop and not in any subloops.
+bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
+ const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ // We can only promote stores in this block if they are unconditionally
+ // executed in the loop. For a block to be unconditionally executed, it has
+ // to dominate all the exit blocks of the loop. Verify this now.
+ auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
+ return DT->dominates(BB, EB);
+ };
+ if (!std::all_of(ExitBlocks.begin(), ExitBlocks.end(), DominatedByBB))
+ return false;
+
+ bool MadeChange = false;
+ // Look for store instructions, which may be optimized to memset/memcpy.
+ SmallVector<StoreInst*,8> Stores;
+ collectStores(CurLoop, BB, Stores);
+
+ // Optimize the store into a memcpy, if it feeds an similarly strided load.
+ for (auto &SI : Stores)
+ MadeChange |= processCopyingStore(CurLoop, SI, BECount);
+
+ return MadeChange;
+}
+
+
+bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
+ PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
+ if (PMR.recognize())
+ return true;
+
+ if (!HasMemcpy && !HasMemmove)
+ return false;
+
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ assert(!isa<SCEVCouldNotCompute>(BECount) &&
+ "runOnCountableLoop() called on a loop without a predictable"
+ "backedge-taken count");
+
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ bool Changed = false;
+
+ // Scan all the blocks in the loop that are not in subloops.
+ for (auto *BB : L->getBlocks()) {
+ // Ignore blocks in subloops.
+ if (LF->getLoopFor(BB) != L)
+ continue;
+ Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
+ }
+
+ return Changed;
+}
+
+
+bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ const Module &M = *L->getHeader()->getParent()->getParent();
+ if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
+ return false;
+
+ if (skipLoop(L))
+ return false;
+
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (!L->getLoopPreheader())
+ return false;
+
+ // Disable loop idiom recognition if the function's name is a common idiom.
+ StringRef Name = L->getHeader()->getParent()->getName();
+ if (Name == "memset" || Name == "memcpy" || Name == "memmove")
+ return false;
+
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DL = &L->getHeader()->getModule()->getDataLayout();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
+ HasMemcpy = TLI->has(LibFunc_memcpy);
+ HasMemmove = TLI->has(LibFunc_memmove);
+
+ if (SE->hasLoopInvariantBackedgeTakenCount(L))
+ return runOnCountableLoop(L);
+ return false;
+}
+
+
+Pass *llvm::createHexagonLoopIdiomPass() {
+ return new HexagonLoopIdiomRecognize();
+}
+
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index a5dc002642c8..7189b5a52c42 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -109,11 +109,14 @@ void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_RegisterMask:
+ continue;
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
- if (MO.isImplicit()) continue;
+ if (MO.isImplicit())
+ continue;
MCO = MCOperand::createReg(MO.getReg());
break;
case MachineOperand::MO_FPImmediate: {
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 9ff9d93ea0c3..20dc9b0da1db 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -74,7 +74,9 @@ bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII,
return false;
// TypeXTYPE are 64 bit operations.
- if (HII.getType(*Inst2.getInstr()) == HexagonII::TypeXTYPE)
+ unsigned Type = HII.getType(*Inst2.getInstr());
+ if (Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op ||
+ Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM)
return true;
return false;
}
diff --git a/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td b/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td
new file mode 100644
index 000000000000..0b4ac14c7a47
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td
@@ -0,0 +1,204 @@
+//===--- HexagonMapAsm2IntrinV62.gen.td -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+multiclass T_VR_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, IntRegs:$src2),
+ (MI VectorRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVL_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, IntRegsLow8:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, IntRegsLow8:$src3)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, IntRegsLow8:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, IntRegsLow8:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VV_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2),
+ (MI VectorRegs:$src1, VectorRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WW_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, VecDblRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, VecDblRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVV_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WR_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2),
+ (MI VecDblRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WWR_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, VecDblRegs128B:$src2, IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, VecDblRegs128B:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVR_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_ZR_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, IntRegs:$src2),
+ (MI VecPredRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VZR_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, VecPredRegs128B:$src2, IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, VecPredRegs128B:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_ZV_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2),
+ (MI VecPredRegs:$src1, VectorRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, VectorRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, VectorRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_R_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID IntRegs:$src1),
+ (MI IntRegs:$src1)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") IntRegs:$src1),
+ (!cast<InstHexagon>(MI#"_128B") IntRegs:$src1)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_ZZ_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VecPredRegs:$src2),
+ (MI VecPredRegs:$src1, VecPredRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, VecPredRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, VecPredRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVI_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, imm:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, imm:$src3)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, imm:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, imm:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVVI_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, imm:$src4),
+ (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, imm:$src4)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3, imm:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3, imm:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVVI_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, imm:$src4),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, imm:$src4)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3, imm:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3, imm:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+def : T_R_pat <S6_vsplatrbp, int_hexagon_S6_vsplatrbp>;
+def : T_PP_pat <M6_vabsdiffb, int_hexagon_M6_vabsdiffb>;
+def : T_PP_pat <M6_vabsdiffub, int_hexagon_M6_vabsdiffub>;
+def : T_PP_pat <S6_vtrunehb_ppp, int_hexagon_S6_vtrunehb_ppp>;
+def : T_PP_pat <S6_vtrunohb_ppp, int_hexagon_S6_vtrunohb_ppp>;
+
+defm : T_VR_HVX_gen_pat <V6_vlsrb, int_hexagon_V6_vlsrb>;
+defm : T_VR_HVX_gen_pat <V6_vmpyiwub, int_hexagon_V6_vmpyiwub>;
+defm : T_VVL_HVX_gen_pat <V6_vasrwuhrndsat, int_hexagon_V6_vasrwuhrndsat>;
+defm : T_VVL_HVX_gen_pat <V6_vasruwuhrndsat, int_hexagon_V6_vasruwuhrndsat>;
+defm : T_VVL_HVX_gen_pat <V6_vasrhbsat, int_hexagon_V6_vasrhbsat>;
+defm : T_VVL_HVX_gen_pat <V6_vlutvvb_nm, int_hexagon_V6_vlutvvb_nm>;
+defm : T_VVL_HVX_gen_pat <V6_vlutvwh_nm, int_hexagon_V6_vlutvwh_nm>;
+defm : T_VV_HVX_gen_pat <V6_vrounduwuh, int_hexagon_V6_vrounduwuh>;
+defm : T_VV_HVX_gen_pat <V6_vrounduhub, int_hexagon_V6_vrounduhub>;
+defm : T_VV_HVX_gen_pat <V6_vadduwsat, int_hexagon_V6_vadduwsat>;
+defm : T_VV_HVX_gen_pat <V6_vsubuwsat, int_hexagon_V6_vsubuwsat>;
+defm : T_VV_HVX_gen_pat <V6_vaddbsat, int_hexagon_V6_vaddbsat>;
+defm : T_VV_HVX_gen_pat <V6_vsubbsat, int_hexagon_V6_vsubbsat>;
+defm : T_VV_HVX_gen_pat <V6_vaddububb_sat, int_hexagon_V6_vaddububb_sat>;
+defm : T_VV_HVX_gen_pat <V6_vsubububb_sat, int_hexagon_V6_vsubububb_sat>;
+defm : T_VV_HVX_gen_pat <V6_vmpyewuh_64, int_hexagon_V6_vmpyewuh_64>;
+defm : T_VV_HVX_gen_pat <V6_vmaxb, int_hexagon_V6_vmaxb>;
+defm : T_VV_HVX_gen_pat <V6_vminb, int_hexagon_V6_vminb>;
+defm : T_VV_HVX_gen_pat <V6_vsatuwuh, int_hexagon_V6_vsatuwuh>;
+defm : T_VV_HVX_gen_pat <V6_vaddclbw, int_hexagon_V6_vaddclbw>;
+defm : T_VV_HVX_gen_pat <V6_vaddclbh, int_hexagon_V6_vaddclbh>;
+defm : T_WW_HVX_gen_pat <V6_vadduwsat_dv, int_hexagon_V6_vadduwsat_dv>;
+defm : T_WW_HVX_gen_pat <V6_vsubuwsat_dv, int_hexagon_V6_vsubuwsat_dv>;
+defm : T_WW_HVX_gen_pat <V6_vaddbsat_dv, int_hexagon_V6_vaddbsat_dv>;
+defm : T_WW_HVX_gen_pat <V6_vsubbsat_dv, int_hexagon_V6_vsubbsat_dv>;
+defm : T_WVV_HVX_gen_pat <V6_vaddhw_acc, int_hexagon_V6_vaddhw_acc>;
+defm : T_WVV_HVX_gen_pat <V6_vadduhw_acc, int_hexagon_V6_vadduhw_acc>;
+defm : T_WVV_HVX_gen_pat <V6_vaddubh_acc, int_hexagon_V6_vaddubh_acc>;
+defm : T_WVV_HVX_gen_pat <V6_vmpyowh_64_acc, int_hexagon_V6_vmpyowh_64_acc>;
+defm : T_WR_HVX_gen_pat <V6_vmpauhb, int_hexagon_V6_vmpauhb>;
+defm : T_WWR_HVX_gen_pat <V6_vmpauhb_acc, int_hexagon_V6_vmpauhb_acc>;
+defm : T_VVR_HVX_gen_pat <V6_vmpyiwub_acc, int_hexagon_V6_vmpyiwub_acc>;
+defm : T_ZR_HVX_gen_pat <V6_vandnqrt, int_hexagon_V6_vandnqrt>;
+defm : T_VZR_HVX_gen_pat <V6_vandnqrt_acc, int_hexagon_V6_vandnqrt_acc>;
+defm : T_ZV_HVX_gen_pat <V6_vandvqv, int_hexagon_V6_vandvqv>;
+defm : T_ZV_HVX_gen_pat <V6_vandvnqv, int_hexagon_V6_vandvnqv>;
+defm : T_R_HVX_gen_pat <V6_pred_scalar2v2, int_hexagon_V6_pred_scalar2v2>;
+defm : T_R_HVX_gen_pat <V6_lvsplath, int_hexagon_V6_lvsplath>;
+defm : T_R_HVX_gen_pat <V6_lvsplatb, int_hexagon_V6_lvsplatb>;
+defm : T_ZZ_HVX_gen_pat <V6_shuffeqw, int_hexagon_V6_shuffeqw>;
+defm : T_ZZ_HVX_gen_pat <V6_shuffeqh, int_hexagon_V6_shuffeqh>;
+defm : T_VVI_HVX_gen_pat <V6_vlutvvbi, int_hexagon_V6_vlutvvbi>;
+defm : T_VVI_HVX_gen_pat <V6_vlutvwhi, int_hexagon_V6_vlutvwhi>;
+defm : T_VVVI_HVX_gen_pat <V6_vlutvvb_oracci, int_hexagon_V6_vlutvvb_oracci>;
+defm : T_WVVI_HVX_gen_pat <V6_vlutvwh_oracci, int_hexagon_V6_vlutvwh_oracci>;
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 72d8011277e6..d73fc7c73185 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -130,6 +130,8 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
if (II->getOpcode() == TargetOpcode::KILL)
return false;
+ if (II->isImplicitDef())
+ return false;
// Make sure there there is no 'def' or 'use' of any of the uses of
// feeder insn between it's definition, this MI and jump, jmpInst
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
index 983310571563..f87a1b8e424d 100644
--- a/lib/Target/Hexagon/HexagonOperands.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -1,298 +1,33 @@
-//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===//
+//===--- HexagonOperands.td -----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illnois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; }
-def s23_2ImmOperand : AsmOperandClass { let Name = "s23_2Imm"; }
-def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; }
-def s8_0Imm64Operand : AsmOperandClass { let Name = "s8_0Imm64"; }
-def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; }
-def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; }
-def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; }
-def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; }
-def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; }
-def s4_6ImmOperand : AsmOperandClass { let Name = "s4_6Imm"; }
-def s3_6ImmOperand : AsmOperandClass { let Name = "s3_6Imm"; }
-def u64_0ImmOperand : AsmOperandClass { let Name = "u64_0Imm"; }
-def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; }
-def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; }
-def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; }
-def u16_1ImmOperand : AsmOperandClass { let Name = "u16_1Imm"; }
-def u16_2ImmOperand : AsmOperandClass { let Name = "u16_2Imm"; }
-def u16_3ImmOperand : AsmOperandClass { let Name = "u16_3Imm"; }
-def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; }
-def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; }
-def u9_0ImmOperand : AsmOperandClass { let Name = "u9_0Imm"; }
-def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; }
-def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; }
-def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; }
-def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; }
-def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; }
-def u6_3ImmOperand : AsmOperandClass { let Name = "u6_3Imm"; }
-def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; }
-def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; }
-def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; }
-def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; }
-def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; }
-def n8_0ImmOperand : AsmOperandClass { let Name = "n8_0Imm"; }
-// Immediate operands.
-
-let OperandType = "OPERAND_IMMEDIATE",
- DecoderMethod = "unsignedImmDecoder" in {
- def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand;
- let DecoderMethod = "s32_0ImmDecoder"; }
- def s23_2Imm : Operand<i32> { let ParserMatchClass = s23_2ImmOperand; }
- def s8_0Imm : Operand<i32> { let ParserMatchClass = s8_0ImmOperand;
- let DecoderMethod = "s8_0ImmDecoder"; }
- def s8_0Imm64 : Operand<i64> { let ParserMatchClass = s8_0Imm64Operand;
- let DecoderMethod = "s8_0ImmDecoder"; }
- def s6_0Imm : Operand<i32> { let ParserMatchClass = s6_0ImmOperand;
- let DecoderMethod = "s6_0ImmDecoder"; }
- def s6_3Imm : Operand<i32>;
- def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand;
- let DecoderMethod = "s4_0ImmDecoder"; }
- def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand;
- let DecoderMethod = "s4_1ImmDecoder"; }
- def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand;
- let DecoderMethod = "s4_2ImmDecoder"; }
- def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand;
- let DecoderMethod = "s4_3ImmDecoder"; }
- def u64_0Imm : Operand<i64> { let ParserMatchClass = u64_0ImmOperand; }
- def u32_0Imm : Operand<i32> { let ParserMatchClass = u32_0ImmOperand; }
- def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; }
- def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; }
- def u16_1Imm : Operand<i32> { let ParserMatchClass = u16_1ImmOperand; }
- def u16_2Imm : Operand<i32> { let ParserMatchClass = u16_2ImmOperand; }
- def u16_3Imm : Operand<i32> { let ParserMatchClass = u16_3ImmOperand; }
- def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; }
- def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; }
- def u9_0Imm : Operand<i32> { let ParserMatchClass = u9_0ImmOperand; }
- def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; }
- def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; }
- def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; }
- def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; }
- def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; }
- def u6_3Imm : Operand<i32> { let ParserMatchClass = u6_3ImmOperand; }
- def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; }
- def u5_1Imm : Operand<i32>;
- def u5_2Imm : Operand<i32>;
- def u5_3Imm : Operand<i32>;
- def u4_0Imm : Operand<i32> { let ParserMatchClass = u4_0ImmOperand; }
- def u4_1Imm : Operand<i32>;
- def u4_2Imm : Operand<i32>;
- def u4_3Imm : Operand<i32>;
- def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; }
- def u3_1Imm : Operand<i32>;
- def u3_2Imm : Operand<i32>;
- def u3_3Imm : Operand<i32>;
- def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; }
- def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; }
- def n8_0Imm : Operand<i32> { let ParserMatchClass = n8_0ImmOperand; }
-}
-
-let OperandType = "OPERAND_IMMEDIATE" in {
- def s4_6Imm : Operand<i32> { let ParserMatchClass = s4_6ImmOperand;
- let PrintMethod = "prints4_6ImmOperand";
- let DecoderMethod = "s4_6ImmDecoder";}
- def s4_7Imm : Operand<i32> { let PrintMethod = "prints4_7ImmOperand";
- let DecoderMethod = "s4_6ImmDecoder";}
- def s3_6Imm : Operand<i32> { let ParserMatchClass = s3_6ImmOperand;
- let PrintMethod = "prints3_6ImmOperand";
- let DecoderMethod = "s3_6ImmDecoder";}
- def s3_7Imm : Operand<i32> { let PrintMethod = "prints3_7ImmOperand";
- let DecoderMethod = "s3_6ImmDecoder";}
-}
-def n1ConstOperand : AsmOperandClass { let Name = "n1Const"; }
-def n1Const : Operand<i32> { let ParserMatchClass = n1ConstOperand; }
-
-//
-// Immediate predicates
-//
-def s32_0ImmPred : PatLeaf<(i32 imm), [{
+def f32ImmOperand : AsmOperandClass { let Name = "f32Imm"; }
+def f32Imm : Operand<f32> { let ParserMatchClass = f32ImmOperand; }
+def f64ImmOperand : AsmOperandClass { let Name = "f64Imm"; }
+def f64Imm : Operand<f64> { let ParserMatchClass = f64ImmOperand; }
+def s8_0Imm64Pred : PatLeaf<(i64 imm), [{ return isInt<8>(N->getSExtValue()); }]>;
+def s9_0ImmOperand : AsmOperandClass { let Name = "s9_0Imm"; }
+def s9_0Imm : Operand<i32> { let ParserMatchClass = s9_0ImmOperand; }
+def s23_2ImmOperand : AsmOperandClass { let Name = "s23_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s23_2Imm : Operand<i32> { let ParserMatchClass = s23_2ImmOperand; }
+def r32_0ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
return isInt<32>(v);
}]>;
-
-def s31_1ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<31,1>(v);
-}]>;
-
-def s30_2ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<30,2>(v);
-}]>;
-
-def s29_3ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<29,3>(v);
-}]>;
-
-def s10_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<10>(v);
-}]>;
-
-def s8_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<8>(v);
-}]>;
-
-def s8_0Imm64Pred : PatLeaf<(i64 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<8>(v);
-}]>;
-
-def s6_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<6>(v);
-}]>;
-
-def s4_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<4>(v);
-}]>;
-
-def s4_1ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<4,1>(v);
-}]>;
-
-def s4_2ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<4,2>(v);
-}]>;
-
-def s4_3ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<4,3>(v);
-}]>;
-
-def u32_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<32>(v);
-}]>;
-
-def u16_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<16>(v);
-}]>;
-
-def u11_3ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<11,3>(v);
-}]>;
-
def u9_0ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
return isUInt<9>(v);
}]>;
-
-def u8_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<8>(v);
-}]>;
-
-def u6_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<6>(v);
-}]>;
-
-def u6_1ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,1>(v);
-}]>;
-
-def u6_2ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,2>(v);
-}]>;
-
-def u5_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<5>(v);
-}]>;
-
-def u4_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<4>(v);
-}]>;
-
-def u3_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<3>(v);
-}]>;
-
-def u2_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<2>(v);
-}]>;
-
-// Extendable immediate operands.
-def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; }
-def s16_0ExtOperand : AsmOperandClass { let Name = "s16_0Ext"; }
-def s12_0ExtOperand : AsmOperandClass { let Name = "s12_0Ext"; }
-def s10_0ExtOperand : AsmOperandClass { let Name = "s10_0Ext"; }
-def s9_0ExtOperand : AsmOperandClass { let Name = "s9_0Ext"; }
-def s8_0ExtOperand : AsmOperandClass { let Name = "s8_0Ext"; }
-def s7_0ExtOperand : AsmOperandClass { let Name = "s7_0Ext"; }
-def s6_0ExtOperand : AsmOperandClass { let Name = "s6_0Ext"; }
-def s11_0ExtOperand : AsmOperandClass { let Name = "s11_0Ext"; }
-def s11_1ExtOperand : AsmOperandClass { let Name = "s11_1Ext"; }
-def s11_2ExtOperand : AsmOperandClass { let Name = "s11_2Ext"; }
-def s11_3ExtOperand : AsmOperandClass { let Name = "s11_3Ext"; }
-def u6_0ExtOperand : AsmOperandClass { let Name = "u6_0Ext"; }
-def u7_0ExtOperand : AsmOperandClass { let Name = "u7_0Ext"; }
-def u8_0ExtOperand : AsmOperandClass { let Name = "u8_0Ext"; }
-def u9_0ExtOperand : AsmOperandClass { let Name = "u9_0Ext"; }
-def u10_0ExtOperand : AsmOperandClass { let Name = "u10_0Ext"; }
-def u6_1ExtOperand : AsmOperandClass { let Name = "u6_1Ext"; }
-def u6_2ExtOperand : AsmOperandClass { let Name = "u6_2Ext"; }
-def u6_3ExtOperand : AsmOperandClass { let Name = "u6_3Ext"; }
-def u32_0MustExtOperand : AsmOperandClass { let Name = "u32_0MustExt"; }
-
-
-
-let OperandType = "OPERAND_IMMEDIATE", PrintMethod = "printExtOperand",
- DecoderMethod = "unsignedImmDecoder" in {
- def f32Ext : Operand<f32> { let ParserMatchClass = f32ExtOperand; }
- def s16_0Ext : Operand<i32> { let ParserMatchClass = s16_0ExtOperand;
- let DecoderMethod = "s16_0ImmDecoder"; }
- def s12_0Ext : Operand<i32> { let ParserMatchClass = s12_0ExtOperand;
- let DecoderMethod = "s12_0ImmDecoder"; }
- def s11_0Ext : Operand<i32> { let ParserMatchClass = s11_0ExtOperand;
- let DecoderMethod = "s11_0ImmDecoder"; }
- def s11_1Ext : Operand<i32> { let ParserMatchClass = s11_1ExtOperand;
- let DecoderMethod = "s11_1ImmDecoder"; }
- def s11_2Ext : Operand<i32> { let ParserMatchClass = s11_2ExtOperand;
- let DecoderMethod = "s11_2ImmDecoder"; }
- def s11_3Ext : Operand<i32> { let ParserMatchClass = s11_3ExtOperand;
- let DecoderMethod = "s11_3ImmDecoder"; }
- def s10_0Ext : Operand<i32> { let ParserMatchClass = s10_0ExtOperand;
- let DecoderMethod = "s10_0ImmDecoder"; }
- def s9_0Ext : Operand<i32> { let ParserMatchClass = s9_0ExtOperand;
- let DecoderMethod = "s9_0ImmDecoder"; }
- def s8_0Ext : Operand<i32> { let ParserMatchClass = s8_0ExtOperand;
- let DecoderMethod = "s8_0ImmDecoder"; }
- def s7_0Ext : Operand<i32> { let ParserMatchClass = s7_0ExtOperand; }
- def s6_0Ext : Operand<i32> { let ParserMatchClass = s6_0ExtOperand;
- let DecoderMethod = "s6_0ImmDecoder"; }
- def u7_0Ext : Operand<i32> { let ParserMatchClass = u7_0ExtOperand; }
- def u8_0Ext : Operand<i32> { let ParserMatchClass = u8_0ExtOperand; }
- def u9_0Ext : Operand<i32> { let ParserMatchClass = u9_0ExtOperand; }
- def u10_0Ext : Operand<i32> { let ParserMatchClass = u10_0ExtOperand; }
- def u6_0Ext : Operand<i32> { let ParserMatchClass = u6_0ExtOperand; }
- def u6_1Ext : Operand<i32> { let ParserMatchClass = u6_1ExtOperand; }
- def u6_2Ext : Operand<i32> { let ParserMatchClass = u6_2ExtOperand; }
- def u6_3Ext : Operand<i32> { let ParserMatchClass = u6_3ExtOperand; }
- def u32_0MustExt : Operand<i32> { let ParserMatchClass = u32_0MustExtOperand; }
-}
-
+def u64_0ImmOperand : AsmOperandClass { let Name = "u64_0Imm"; let RenderMethod = "addImmOperands"; }
+def u64_0Imm : Operand<i64> { let ParserMatchClass = u64_0ImmOperand; }
+def n1ConstOperand : AsmOperandClass { let Name = "n1Const"; }
+def n1Const : Operand<i32> { let ParserMatchClass = n1ConstOperand; }
// This complex pattern exists only to create a machine instruction operand
// of type "frame index". There doesn't seem to be a way to do that directly
@@ -305,28 +40,6 @@ def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>;
def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>;
def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>;
-// Address operands.
-
-let PrintMethod = "printGlobalOperand" in {
- def globaladdress : Operand<i32>;
- def globaladdressExt : Operand<i32>;
-}
-
-let PrintMethod = "printJumpTable" in
-def jumptablebase : Operand<i32>;
-
-def brtarget : Operand<OtherVT> {
- let DecoderMethod = "brtargetDecoder";
- let PrintMethod = "printBrtarget";
-}
-def brtargetExt : Operand<OtherVT> {
- let DecoderMethod = "brtargetDecoder";
- let PrintMethod = "printBrtarget";
-}
-def calltarget : Operand<i32> {
- let DecoderMethod = "brtargetDecoder";
- let PrintMethod = "printBrtarget";
-}
def bblabel : Operand<i32>;
def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">;
diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 89db46799cb3..b243de317dc5 100644
--- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -208,7 +208,16 @@ bool HexagonOptAddrMode::allValidCandidates(NodeAddr<StmtNode *> SA,
NodeAddr<UseNode *> UN = *I;
RegisterRef UR = UN.Addr->getRegRef(*DFG);
NodeSet Visited, Defs;
- const auto &ReachingDefs = LV->getAllReachingDefsRec(UR, UN, Visited, Defs);
+ const auto &P = LV->getAllReachingDefsRec(UR, UN, Visited, Defs);
+ if (!P.second) {
+ DEBUG({
+ dbgs() << "*** Unable to collect all reaching defs for use ***\n"
+ << PrintNode<UseNode*>(UN, *DFG) << '\n'
+ << "The program's complexity may exceed the limits.\n";
+ });
+ return false;
+ }
+ const auto &ReachingDefs = P.first;
if (ReachingDefs.size() > 1) {
DEBUG({
dbgs() << "*** Multiple Reaching Defs found!!! ***\n";
@@ -230,7 +239,7 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA,
for (NodeAddr<DefNode *> DA : SA.Addr->members_if(DFG->IsDef, *DFG)) {
DEBUG(dbgs() << "\t\t[DefNode]: " << Print<NodeAddr<DefNode *>>(DA, *DFG)
<< "\n");
- RegisterRef DR = DFG->normalizeRef(DA.Addr->getRegRef(*DFG));
+ RegisterRef DR = DFG->getPRI().normalize(DA.Addr->getRegRef(*DFG));
auto UseSet = LV->getAllReachedUses(DR, DA);
@@ -250,7 +259,7 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA,
<< Print<Liveness::RefMap>(phiUse, *DFG) << "\n");
if (!phiUse.empty()) {
for (auto I : phiUse) {
- if (DR.Reg != I.first)
+ if (!DFG->getPRI().alias(RegisterRef(I.first), DR))
continue;
auto phiUseSet = I.second;
for (auto phiUI : phiUseSet) {
@@ -333,17 +342,17 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
short NewOpCode = HII->getBaseWithLongOffset(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
- MIB.addOperand(OldMI->getOperand(0));
- MIB.addOperand(OldMI->getOperand(2));
- MIB.addOperand(OldMI->getOperand(3));
- MIB.addOperand(ImmOp);
+ MIB.add(OldMI->getOperand(0));
+ MIB.add(OldMI->getOperand(2));
+ MIB.add(OldMI->getOperand(3));
+ MIB.add(ImmOp);
OpStart = 4;
Changed = true;
} else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) {
short NewOpCode = HII->getAbsoluteForm(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode))
- .addOperand(OldMI->getOperand(0));
+ .add(OldMI->getOperand(0));
const GlobalValue *GV = ImmOp.getGlobal();
int64_t Offset = ImmOp.getOffset() + OldMI->getOperand(2).getImm();
@@ -359,9 +368,9 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
short NewOpCode = HII->xformRegToImmOffset(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
- MIB.addOperand(OldMI->getOperand(0));
- MIB.addOperand(OldMI->getOperand(1));
- MIB.addOperand(ImmOp);
+ MIB.add(OldMI->getOperand(0));
+ MIB.add(OldMI->getOperand(1));
+ MIB.add(ImmOp);
OpStart = 4;
Changed = true;
DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
@@ -370,7 +379,7 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
if (Changed)
for (unsigned i = OpStart; i < OpEnd; ++i)
- MIB.addOperand(OldMI->getOperand(i));
+ MIB.add(OldMI->getOperand(i));
return Changed;
}
@@ -390,10 +399,10 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
short NewOpCode = HII->getBaseWithLongOffset(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
- MIB.addOperand(OldMI->getOperand(1));
- MIB.addOperand(OldMI->getOperand(2));
- MIB.addOperand(ImmOp);
- MIB.addOperand(OldMI->getOperand(3));
+ MIB.add(OldMI->getOperand(1));
+ MIB.add(OldMI->getOperand(2));
+ MIB.add(ImmOp);
+ MIB.add(OldMI->getOperand(3));
OpStart = 4;
} else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) {
short NewOpCode = HII->getAbsoluteForm(*OldMI);
@@ -402,7 +411,7 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
const GlobalValue *GV = ImmOp.getGlobal();
int64_t Offset = ImmOp.getOffset() + OldMI->getOperand(1).getImm();
MIB.addGlobalAddress(GV, Offset, ImmOp.getTargetFlags());
- MIB.addOperand(OldMI->getOperand(2));
+ MIB.add(OldMI->getOperand(2));
OpStart = 3;
}
Changed = true;
@@ -412,9 +421,9 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
short NewOpCode = HII->xformRegToImmOffset(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
- MIB.addOperand(OldMI->getOperand(0));
- MIB.addOperand(ImmOp);
- MIB.addOperand(OldMI->getOperand(1));
+ MIB.add(OldMI->getOperand(0));
+ MIB.add(ImmOp);
+ MIB.add(OldMI->getOperand(1));
OpStart = 2;
Changed = true;
DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
@@ -422,7 +431,7 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
}
if (Changed)
for (unsigned i = OpStart; i < OpEnd; ++i)
- MIB.addOperand(OldMI->getOperand(i));
+ MIB.add(OldMI->getOperand(i));
return Changed;
}
@@ -473,26 +482,26 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN,
BuildMI(*BB, InsertPt, UseMI->getDebugLoc(), HII->get(NewOpCode));
// change mem(Rs + # ) -> mem(Rt << # + ##)
if (UseMID.mayLoad()) {
- MIB.addOperand(UseMI->getOperand(0));
- MIB.addOperand(AddAslMI->getOperand(2));
- MIB.addOperand(AddAslMI->getOperand(3));
+ MIB.add(UseMI->getOperand(0));
+ MIB.add(AddAslMI->getOperand(2));
+ MIB.add(AddAslMI->getOperand(3));
const GlobalValue *GV = ImmOp.getGlobal();
MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm(),
ImmOp.getTargetFlags());
OpStart = 3;
} else if (UseMID.mayStore()) {
- MIB.addOperand(AddAslMI->getOperand(2));
- MIB.addOperand(AddAslMI->getOperand(3));
+ MIB.add(AddAslMI->getOperand(2));
+ MIB.add(AddAslMI->getOperand(3));
const GlobalValue *GV = ImmOp.getGlobal();
MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm(),
ImmOp.getTargetFlags());
- MIB.addOperand(UseMI->getOperand(2));
+ MIB.add(UseMI->getOperand(2));
OpStart = 3;
} else
llvm_unreachable("Unhandled instruction");
for (unsigned i = OpStart; i < OpEnd; ++i)
- MIB.addOperand(UseMI->getOperand(i));
+ MIB.add(UseMI->getOperand(i));
Deleted.insert(UseMI);
}
@@ -617,7 +626,7 @@ bool HexagonOptAddrMode::constructDefMap(MachineBasicBlock *B) {
for (NodeAddr<InstrNode *> IA : BA.Addr->members(*DFG)) {
updateMap(IA);
- DFG->pushDefs(IA, DefM);
+ DFG->pushAllDefs(IA, DefM);
}
MachineDomTreeNode *N = MDT->getNode(B);
@@ -629,6 +638,9 @@ bool HexagonOptAddrMode::constructDefMap(MachineBasicBlock *B) {
}
bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
bool Changed = false;
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &MRI = MF.getRegInfo();
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index ad81287007e6..b8c3bf0745ce 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -17,6 +17,16 @@ def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>;
def IsOrAdd: PatFrag<(ops node:$Addr, node:$off),
(or node:$Addr, node:$off), [{ return isOrEquivalentToAdd(N); }]>;
+def Iss4_6 : PatLeaf<(i32 imm), [{
+ int32_t V = N->getSExtValue();
+ return isShiftedInt<4,6>(V);
+}]>;
+
+def Iss4_7 : PatLeaf<(i32 imm), [{
+ int32_t V = N->getSExtValue();
+ return isShiftedInt<4,7>(V);
+}]>;
+
def IsPow2_32 : PatLeaf<(i32 imm), [{
uint32_t V = N->getZExtValue();
return isPowerOf2_32(V);
@@ -89,6 +99,11 @@ def LogN2_64 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(Log2_64(NV), SDLoc(N), MVT::i32);
}]>;
+def ToZext64: OutPatFrag<(ops node:$Rs),
+ (i64 (A4_combineir 0, (i32 $Rs)))>;
+def ToSext64: OutPatFrag<(ops node:$Rs),
+ (i64 (A2_sxtw (i32 $Rs)))>;
+
class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred>
: Pat<(i1 (OpNode I32:$src1, ImmPred:$src2)),
@@ -153,8 +168,12 @@ def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs),
def: Pat<(not I32:$src1),
(A2_subri -1, IntRegs:$src1)>;
+def TruncI64ToI32: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>;
-def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi imm:$s8)>;
+def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi (TruncI64ToI32 $s8))>;
def : Pat<(select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs),
(C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>;
@@ -274,7 +293,7 @@ def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
-def: Pat<(br bb:$dst), (J2_jump brtarget:$dst)>;
+def: Pat<(br bb:$dst), (J2_jump b30_2Imm:$dst)>;
def: Pat<(brcond I1:$src1, bb:$block), (J2_jumpt PredRegs:$src1, bb:$block)>;
def: Pat<(brind I32:$dst), (J2_jumpr IntRegs:$dst)>;
@@ -695,8 +714,8 @@ def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
// Map TLS addressses to A2_tfrsi.
-def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16_0Ext:$addr)>;
-def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16_0Ext:$label)>;
+def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s32_0Imm:$addr)>;
+def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s32_0Imm:$label)>;
def: Pat<(i64 imm:$v), (CONST64 imm:$v)>;
def: Pat<(i1 0), (PS_false)>;
@@ -898,26 +917,35 @@ def: Pat<(i1 (setule I64:$src1, I64:$src2)),
(C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
// Sign extends.
-// i1 -> i32
-def: Pat<(i32 (sext I1:$src1)),
- (C2_muxii PredRegs:$src1, -1, 0)>;
+// sext i1->i32
+def: Pat<(i32 (sext I1:$Pu)),
+ (C2_muxii I1:$Pu, -1, 0)>;
-// i1 -> i64
-def: Pat<(i64 (sext I1:$src1)),
- (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
+// sext i1->i64
+def: Pat<(i64 (sext I1:$Pu)),
+ (A2_combinew (C2_muxii PredRegs:$Pu, -1, 0),
+ (C2_muxii PredRegs:$Pu, -1, 0))>;
// Zero extends.
-// i1 -> i32
-def: Pat<(i32 (zext I1:$src1)),
- (C2_muxii PredRegs:$src1, 1, 0)>;
+// zext i1->i32
+def: Pat<(i32 (zext I1:$Pu)),
+ (C2_muxii PredRegs:$Pu, 1, 0)>;
+
+// zext i1->i64
+def: Pat<(i64 (zext I1:$Pu)),
+ (ToZext64 (C2_muxii PredRegs:$Pu, 1, 0))>;
+
+// zext i32->i64
+def: Pat<(Zext64 I32:$Rs),
+ (ToZext64 IntRegs:$Rs)>;
// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
-def: Pat<(i32 (anyext I1:$src1)),
- (C2_muxii PredRegs:$src1, 1, 0)>;
+def: Pat<(i32 (anyext I1:$Pu)),
+ (C2_muxii PredRegs:$Pu, 1, 0)>;
-// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
-def: Pat<(i64 (anyext I1:$src1)),
- (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
+// Map from Rss = Pd to Rdd = combine(#0, (mux(Pd, #1, #0)))
+def: Pat<(i64 (anyext I1:$Pu)),
+ (ToZext64 (C2_muxii PredRegs:$Pu, 1, 0))>;
// Clear the sign bit in a 64-bit register.
def ClearSign : OutPatFrag<(ops node:$Rss),
@@ -1244,11 +1272,6 @@ def: Pat<(HexagonCOMBINE s32_0ImmPred:$s8, s8_0ImmPred:$S8),
}
-def ToZext64: OutPatFrag<(ops node:$Rs),
- (i64 (A4_combineir 0, (i32 $Rs)))>;
-def ToSext64: OutPatFrag<(ops node:$Rs),
- (i64 (A2_sxtw (i32 $Rs)))>;
-
// Patterns to generate indexed loads with different forms of the address:
// - frameindex,
// - base + offset,
@@ -1349,14 +1372,6 @@ let AddedComplexity = 20 in {
def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>;
}
-// zext i1->i64
-def: Pat<(i64 (zext I1:$src1)),
- (ToZext64 (C2_muxii PredRegs:$src1, 1, 0))>;
-
-// zext i32->i64
-def: Pat<(Zext64 I32:$src1),
- (ToZext64 IntRegs:$src1)>;
-
let AddedComplexity = 40 in
multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
PatFrag stOp> {
@@ -1587,6 +1602,15 @@ def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>;
def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>;
def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>;
+def: Pat<(i64 (ctpop I64:$Rss)), (ToZext64 (S5_popcountp I64:$Rss))>;
+def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>;
+
+def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>;
+def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>;
+
+def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>;
+def: Pat<(bswap I64:$Rss), (A2_combinew (A2_swiz (LoReg $Rss)),
+ (A2_swiz (HiReg $Rss)))>;
let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
@@ -2235,12 +2259,6 @@ def ftoi : SDNodeXForm<fpimm, [{
def: Pat<(sra (i64 (add (sra I64:$src1, u6_0ImmPred:$src2), 1)), (i32 1)),
(S2_asr_i_p_rnd I64:$src1, imm:$src2)>;
-def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
- SDTCisVT<1, i64>]>;
-def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
-
-def: Pat<(HexagonPOPCOUNT I64:$Rss), (S5_popcountp I64:$Rss)>;
-
let AddedComplexity = 20 in {
defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
@@ -2718,17 +2736,6 @@ def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [
}]>;
-def s4_6ImmPred: PatLeaf<(i32 imm), [{
- int64_t V = N->getSExtValue();
- return isShiftedInt<4,6>(V);
-}]>;
-
-def s4_7ImmPred: PatLeaf<(i32 imm), [{
- int64_t V = N->getSExtValue();
- return isShiftedInt<4,7>(V);
-}]>;
-
-
multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Aligned stores
def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
@@ -2749,25 +2756,25 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Fold Add R+OFF into vector store.
let AddedComplexity = 10 in {
def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
- (add IntRegs:$src2, s4_6ImmPred:$offset)),
- (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
+ (add IntRegs:$src2, Iss4_6:$offset)),
+ (V6_vS32b_ai IntRegs:$src2, Iss4_6:$offset,
(VTSgl VectorRegs:$src1))>,
Requires<[UseHVXSgl]>;
def : Pat<(unalignedstore (VTSgl VectorRegs:$src1),
- (add IntRegs:$src2, s4_6ImmPred:$offset)),
- (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset,
+ (add IntRegs:$src2, Iss4_6:$offset)),
+ (V6_vS32Ub_ai IntRegs:$src2, Iss4_6:$offset,
(VTSgl VectorRegs:$src1))>,
Requires<[UseHVXSgl]>;
// Fold Add R+OFF into vector store 128B.
def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
- (add IntRegs:$src2, s4_7ImmPred:$offset)),
- (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+ (add IntRegs:$src2, Iss4_7:$offset)),
+ (V6_vS32b_ai_128B IntRegs:$src2, Iss4_7:$offset,
(VTDbl VectorRegs128B:$src1))>,
Requires<[UseHVXDbl]>;
def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1),
- (add IntRegs:$src2, s4_7ImmPred:$offset)),
- (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+ (add IntRegs:$src2, Iss4_7:$offset)),
+ (V6_vS32Ub_ai_128B IntRegs:$src2, Iss4_7:$offset,
(VTDbl VectorRegs128B:$src1))>,
Requires<[UseHVXDbl]>;
}
@@ -2798,18 +2805,18 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Fold Add R+OFF into vector load.
let AddedComplexity = 10 in {
- def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
- (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+ def : Pat<(VTDbl (alignedload (add IntRegs:$src2, Iss4_7:$offset))),
+ (V6_vL32b_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
Requires<[UseHVXDbl]>;
- def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
- (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+ def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, Iss4_7:$offset))),
+ (V6_vL32Ub_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
Requires<[UseHVXDbl]>;
- def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
- (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+ def : Pat<(VTSgl (alignedload (add IntRegs:$src2, Iss4_6:$offset))),
+ (V6_vL32b_ai IntRegs:$src2, Iss4_6:$offset)>,
Requires<[UseHVXSgl]>;
- def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
- (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+ def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, Iss4_6:$offset))),
+ (V6_vL32Ub_ai IntRegs:$src2, Iss4_6:$offset)>,
Requires<[UseHVXSgl]>;
}
}
@@ -3253,8 +3260,8 @@ def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
(M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
- (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
- (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
+ (LoReg (S2_vtrunewh (A2_combineii 0, 0),
+ (vmpyh V2I16:$Rs, V2I16:$Rt)))>;
// Multiplies two v4i16 vectors.
def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
@@ -3345,3 +3352,11 @@ def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
(S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
+
+// Read cycle counter.
+//
+def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
+def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,
+ [SDNPHasChain]>;
+
+def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>;
diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td
new file mode 100644
index 000000000000..5a720e794562
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonPseudo.td
@@ -0,0 +1,537 @@
+//===--- HexagonPseudo.td -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+let PrintMethod = "printGlobalOperand" in {
+ def globaladdress : Operand<i32>;
+ def globaladdressExt : Operand<i32>;
+}
+
+let isPseudo = 1 in {
+let isCodeGenOnly = 0 in
+def A2_iconst : Pseudo<(outs IntRegs:$Rd32), (ins s23_2Imm:$Ii), "${Rd32}=iconst(#${Ii})">;
+def DUPLEX_Pseudo : InstHexagon<(outs), (ins s32_0Imm:$offset), "DUPLEX", [], "", DUPLEX, TypePSEUDO>;
+}
+
+let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
+ isAsmParserOnly = 1 in
+def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst),
+ (ins s32_0Imm:$src1, s8_0Imm:$src2),
+ "$dst=combine(#$src1,#$src2)">;
+
+// HI/LO Instructions
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
+ hasNewValue = 1, opNewValue = 0 in
+class REG_IMMED<string RegHalf, bit Rs, bits<3> MajOp, bit MinOp>
+ : InstHexagon<(outs IntRegs:$dst),
+ (ins u16_0Imm:$imm_value),
+ "$dst"#RegHalf#"=#$imm_value", [], "", ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, OpcodeHexagon {
+ bits<5> dst;
+ bits<32> imm_value;
+
+ let Inst{27} = Rs;
+ let Inst{26-24} = MajOp;
+ let Inst{21} = MinOp;
+ let Inst{20-16} = dst;
+ let Inst{23-22} = imm_value{15-14};
+ let Inst{13-0} = imm_value{13-0};
+}
+
+let isAsmParserOnly = 1 in {
+ def LO : REG_IMMED<".l", 0b0, 0b001, 0b1>;
+ def HI : REG_IMMED<".h", 0b0, 0b010, 0b1>;
+}
+
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in {
+ def CONST32 : CONSTLDInst<(outs IntRegs:$Rd), (ins i32imm:$v),
+ "$Rd = CONST32(#$v)", []>;
+ def CONST64 : CONSTLDInst<(outs DoubleRegs:$Rd), (ins i64imm:$v),
+ "$Rd = CONST64(#$v)", []>;
+}
+
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+ isCodeGenOnly = 1 in
+def PS_true : SInst<(outs PredRegs:$dst), (ins), "", []>;
+
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+ isCodeGenOnly = 1 in
+def PS_false : SInst<(outs PredRegs:$dst), (ins), "", []>;
+
+let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ ".error \"should not emit\" ", []>;
+
+let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ ".error \"should not emit\" ", []>;
+
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
+ Defs = [PC, LC0], Uses = [SA0, LC0] in {
+def ENDLOOP0 : Endloop<(outs), (ins b30_2Imm:$offset),
+ ":endloop0",
+ []>;
+}
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
+ Defs = [PC, LC1], Uses = [SA1, LC1] in {
+def ENDLOOP1 : Endloop<(outs), (ins b30_2Imm:$offset),
+ ":endloop1",
+ []>;
+}
+
+let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
+ opExtendable = 0, hasSideEffects = 0 in
+class LOOP_iBase<string mnemonic, Operand brOp, bit mustExtend = 0>
+ : CRInst<(outs), (ins brOp:$offset, u10_0Imm:$src2),
+ #mnemonic#"($offset,#$src2)",
+ [], "" , CR_tc_3x_SLOT3> {
+ bits<9> offset;
+ bits<10> src2;
+
+ let IClass = 0b0110;
+
+ let Inst{27-22} = 0b100100;
+ let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
+ let Inst{20-16} = src2{9-5};
+ let Inst{12-8} = offset{8-4};
+ let Inst{7-5} = src2{4-2};
+ let Inst{4-3} = offset{3-2};
+ let Inst{1-0} = src2{1-0};
+}
+
+let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
+ opExtendable = 0, hasSideEffects = 0 in
+class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0>
+ : CRInst<(outs), (ins brOp:$offset, IntRegs:$src2),
+ #mnemonic#"($offset,$src2)",
+ [], "" ,CR_tc_3x_SLOT3> {
+ bits<9> offset;
+ bits<5> src2;
+
+ let IClass = 0b0110;
+
+ let Inst{27-22} = 0b000000;
+ let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
+ let Inst{20-16} = src2;
+ let Inst{12-8} = offset{8-4};
+ let Inst{4-3} = offset{3-2};
+ }
+
+multiclass LOOP_ri<string mnemonic> {
+ let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in {
+ def iext: LOOP_iBase<mnemonic, b30_2Imm, 1>;
+ def rext: LOOP_rBase<mnemonic, b30_2Imm, 1>;
+ }
+}
+
+
+let Defs = [SA0, LC0, USR] in
+defm J2_loop0 : LOOP_ri<"loop0">;
+
+// Interestingly only loop0's appear to set usr.lpcfg
+let Defs = [SA1, LC1] in
+defm J2_loop1 : LOOP_ri<"loop1">;
+
+let isCall = 1, hasSideEffects = 1, isPredicable = 0,
+ isExtended = 0, isExtendable = 1, opExtendable = 0,
+ isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in
+class T_Call<string ExtStr>
+ : JInst<(outs), (ins a30_2Imm:$dst),
+ "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> {
+ let BaseOpcode = "call";
+ bits<24> dst;
+
+ let IClass = 0b0101;
+ let Inst{27-25} = 0b101;
+ let Inst{24-16,13-1} = dst{23-2};
+ let Inst{0} = 0b0;
+}
+
+let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = [R16],
+ isPredicable = 0 in
+def CALLProfile : T_Call<"">;
+
+let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1,
+ Defs = [PC, R31, R6, R7, P0] in
+def PS_call_stk : T_Call<"">;
+
+let isCall = 1, hasSideEffects = 1, cofMax1 = 1 in
+class JUMPR_MISC_CALLR<bit isPred, bit isPredNot,
+ dag InputDag = (ins IntRegs:$Rs)>
+ : JInst<(outs), InputDag,
+ !if(isPred, !if(isPredNot, "if (!$Pu) callr $Rs",
+ "if ($Pu) callr $Rs"),
+ "callr $Rs"),
+ [], "", J_tc_2early_SLOT2> {
+ bits<5> Rs;
+ bits<2> Pu;
+ let isPredicated = isPred;
+ let isPredicatedFalse = isPredNot;
+
+ let IClass = 0b0101;
+ let Inst{27-25} = 0b000;
+ let Inst{24-23} = !if (isPred, 0b10, 0b01);
+ let Inst{22} = 0;
+ let Inst{21} = isPredNot;
+ let Inst{9-8} = !if (isPred, Pu, 0b00);
+ let Inst{20-16} = Rs;
+
+ }
+
+let isCodeGenOnly = 1 in {
+ def PS_callr_nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return.
+}
+
+let isCall = 1, hasSideEffects = 1,
+ isExtended = 0, isExtendable = 1, opExtendable = 0, isCodeGenOnly = 1,
+ BaseOpcode = "PS_call_nr", isExtentSigned = 1, opExtentAlign = 2,
+ Itinerary = J_tc_2early_SLOT23 in
+class Call_nr<bits<5> nbits, bit isPred, bit isFalse, dag iops>
+ : Pseudo<(outs), iops, "">, PredRel {
+ bits<2> Pu;
+ bits<17> dst;
+ let opExtentBits = nbits;
+ let isPredicable = 0; // !if(isPred, 0, 1);
+ let isPredicated = 0; // isPred;
+ let isPredicatedFalse = isFalse;
+}
+
+def PS_call_nr : Call_nr<24, 0, 0, (ins s32_0Imm:$Ii)>;
+//def PS_call_nrt: Call_nr<17, 1, 0, (ins PredRegs:$Pu, s32_0Imm:$dst)>;
+//def PS_call_nrf: Call_nr<17, 1, 1, (ins PredRegs:$Pu, s32_0Imm:$dst)>;
+
+let isBranch = 1, isIndirectBranch = 1, isBarrier = 1, Defs = [PC],
+ isPredicable = 1, hasSideEffects = 0, InputType = "reg",
+ cofMax1 = 1 in
+class T_JMPr
+ : InstHexagon<(outs), (ins IntRegs:$dst), "jumpr $dst", [],
+ "", J_tc_2early_SLOT2, TypeJ>, OpcodeHexagon {
+ bits<5> dst;
+
+ let IClass = 0b0101;
+ let Inst{27-21} = 0b0010100;
+ let Inst{20-16} = dst;
+}
+
+// A return through builtin_eh_return.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0,
+ isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
+def EH_RETURN_JMPR : T_JMPr;
+
+// Indirect tail-call.
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+ isTerminator = 1, isCodeGenOnly = 1 in
+def PS_tailcall_r : T_JMPr;
+
+//
+// Direct tail-calls.
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+ isTerminator = 1, isCodeGenOnly = 1 in
+def PS_tailcall_i : Pseudo<(outs), (ins a30_2Imm:$dst), "", []>;
+
+let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in
+def PS_aligna : Pseudo<(outs IntRegs:$Rd), (ins u32_0Imm:$A), "", []>;
+
+// Generate frameindex addresses. The main reason for the offset operand is
+// that every instruction that is allowed to have frame index as an operand
+// will then have that operand followed by an immediate operand (the offset).
+// This simplifies the frame-index elimination code.
+//
+let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
+ isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in {
+ def PS_fi : Pseudo<(outs IntRegs:$Rd),
+ (ins IntRegs:$fi, s32_0Imm:$off), "">;
+ def PS_fia : Pseudo<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$fi, s32_0Imm:$off), "">;
+}
+
+class CondStr<string CReg, bit True, bit New> {
+ string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
+}
+class JumpOpcStr<string Mnemonic, bit New, bit Taken> {
+ string S = Mnemonic # !if(Taken, ":t", ":nt");
+}
+let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1,
+ hasSideEffects = 0, InputType = "reg", cofMax1 = 1 in
+class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>
+ : InstHexagon<(outs), (ins PredRegs:$src, IntRegs:$dst),
+ CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
+ JumpOpcStr<"jumpr", isPredNew, isTak>.S # " $dst",
+ [], "", J_tc_2early_SLOT2, TypeJ>, OpcodeHexagon {
+
+ let isTaken = isTak;
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = isPredNew;
+ bits<2> src;
+ bits<5> dst;
+
+ let IClass = 0b0101;
+
+ let Inst{27-22} = 0b001101;
+ let Inst{21} = PredNot;
+ let Inst{20-16} = dst;
+ let Inst{12} = isTak;
+ let Inst{11} = isPredNew;
+ let Inst{9-8} = src;
+}
+multiclass JMPR_Pred<bit PredNot> {
+ def NAME : T_JMPr_c<PredNot, 0, 0>; // not taken
+ // Predicate new
+ def NAME#newpt : T_JMPr_c<PredNot, 1, 1>; // taken
+ def NAME#new : T_JMPr_c<PredNot, 1, 0>; // not taken
+}
+multiclass JMPR_base<string BaseOp> {
+ let BaseOpcode = BaseOp in {
+ def NAME : T_JMPr;
+ defm t : JMPR_Pred<0>;
+ defm f : JMPR_Pred<1>;
+ }
+}
+let isTerminator = 1, hasSideEffects = 0, isReturn = 1, isCodeGenOnly = 1, isBarrier = 1 in
+defm PS_jmpret : JMPR_base<"JMPret">, PredNewRel;
+
+//defm V6_vtran2x2_map : HexagonMapping<(outs VectorRegs:$Vy32, VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, IntRegs:$Rt32), "vtrans2x2(${Vy32},${Vx32},${Rt32})", (V6_vshuff VectorRegs:$Vy32, VectorRegs:$Vx32, VectorRegs:$Vx32in, IntRegs:$Rt32)>;
+
+// The reason for the custom inserter is to record all ALLOCA instructions
+// in MachineFunctionInfo.
+let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1 in
+def PS_alloca: InstHexagon<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u32_0Imm:$A), "",
+ [], "", ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>;
+
+// Load predicate.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def LDriw_pred : LDInst<(outs PredRegs:$dst),
+ (ins IntRegs:$addr, s32_0Imm:$off),
+ ".error \"should not emit\"", []>;
+
+// Load modifier.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def LDriw_mod : LDInst<(outs ModRegs:$dst),
+ (ins IntRegs:$addr, s32_0Imm:$off),
+ ".error \"should not emit\"", []>;
+
+// Vector load
+let Predicates = [HasV60T, UseHVX] in
+let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+ class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_LD,
+ IType type = TypeCVI_VM_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+// Vector store
+let Predicates = [HasV60T, UseHVX] in
+let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_ST,
+ IType type = TypeCVI_VM_ST>
+: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+def PS_pselect : ALU64_rr<(outs DoubleRegs:$Rd),
+ (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"should not emit\" ", []>;
+
+let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0,
+ isPredicable = 1,
+ isExtendable = 1, opExtendable = 0, isExtentSigned = 1,
+ opExtentBits = 24, opExtentAlign = 2, InputType = "imm" in
+class T_JMP<string ExtStr>
+ : JInst_CJUMP_UCJUMP<(outs), (ins b30_2Imm:$dst),
+ "jump " # ExtStr # "$dst",
+ [], "", J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT> {
+ bits<24> dst;
+ let IClass = 0b0101;
+
+ let Inst{27-25} = 0b100;
+ let Inst{24-16} = dst{23-15};
+ let Inst{13-1} = dst{14-2};
+}
+
+// Restore registers and dealloc return function call.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
+ def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
+
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">;
+
+ let Defs = [R14, R15, R28, R29, R30, R31, PC] in {
+ def RESTORE_DEALLOC_RET_JMP_V4_PIC : T_JMP<"">;
+
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC : T_JMP<"">;
+ }
+}
+
+// Restore registers and dealloc frame before a tail call.
+let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel;
+
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<"">, PredRel;
+
+ let Defs = [R14, R15, R28, R29, R30, R31, PC] in {
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC : T_Call<"">, PredRel;
+
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC : T_Call<"">, PredRel;
+ }
+}
+
+// Save registers function call.
+let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
+ def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel;
+
+ let isExtended = 1, opExtendable = 0 in
+ def SAVE_REGISTERS_CALL_V4_EXT : T_Call<"">, PredRel;
+
+ let Defs = [P0] in
+ def SAVE_REGISTERS_CALL_V4STK : T_Call<"">, PredRel;
+
+ let Defs = [P0], isExtended = 1, opExtendable = 0 in
+ def SAVE_REGISTERS_CALL_V4STK_EXT : T_Call<"">, PredRel;
+
+ let Defs = [R14, R15, R28] in
+ def SAVE_REGISTERS_CALL_V4_PIC : T_Call<"">, PredRel;
+
+ let Defs = [R14, R15, R28], isExtended = 1, opExtendable = 0 in
+ def SAVE_REGISTERS_CALL_V4_EXT_PIC : T_Call<"">, PredRel;
+
+ let Defs = [R14, R15, R28, P0] in
+ def SAVE_REGISTERS_CALL_V4STK_PIC : T_Call<"">, PredRel;
+
+ let Defs = [R14, R15, R28, P0], isExtended = 1, opExtendable = 0 in
+ def SAVE_REGISTERS_CALL_V4STK_EXT_PIC : T_Call<"">, PredRel;
+}
+
+// Vector load/store pseudos
+
+let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
+class STrivv_template<RegisterClass RC>
+ : V6_STInst<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src), "", []>;
+
+def PS_vstorerw_ai: STrivv_template<VecDblRegs>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vstorerwu_ai: STrivv_template<VecDblRegs>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vstorerw_ai_128B: STrivv_template<VecDblRegs128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
+
+let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
+class LDrivv_template<RegisterClass RC>
+ : V6_LDInst<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off), "", []>;
+
+def PS_vloadrw_ai: LDrivv_template<VecDblRegs>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vloadrwu_ai: LDrivv_template<VecDblRegs>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vloadrw_ai_128B: LDrivv_template<VecDblRegs128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
+// Store vector predicate pseudo.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+ def PS_vstorerq_ai : STInst<(outs),
+ (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs:$src1),
+ ".error \"should not emit\" ", []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+ def PS_vstorerq_ai_128B : STInst<(outs),
+ (ins IntRegs:$base, s32_0Imm:$offset, VectorRegs:$src1),
+ ".error \"should not emit\" ", []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+ def PS_vloadrq_ai : STInst<(outs),
+ (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1),
+ ".error \"should not emit\" ", []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
+ def PS_vloadrq_ai_128B : STInst<(outs),
+ (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1),
+ ".error \"should not emit\" ", []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VA_DV,
+ IType type = TypeCVI_VA_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+ def PS_vselect: VSELInst<(outs VectorRegs:$dst),
+ (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), "", []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+ def PS_vselect_128B: VSELInst<(outs VectorRegs128B:$dst),
+ (ins PredRegs:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3),
+ "", []>, Requires<[HasV60T,UseHVXDbl]>;
+ def PS_wselect: VSELInst<(outs VecDblRegs:$dst),
+ (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3), "", []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+ def PS_wselect_128B: VSELInst<(outs VecDblRegs128B:$dst),
+ (ins PredRegs:$src1, VecDblRegs128B:$src2, VecDblRegs128B:$src3),
+ "", []>, Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Store predicate.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def STriw_pred : STInst<(outs),
+ (ins IntRegs:$addr, s32_0Imm:$off, PredRegs:$src1),
+ ".error \"should not emit\"", []>;
+// Store modifier.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def STriw_mod : STInst<(outs),
+ (ins IntRegs:$addr, s32_0Imm:$off, ModRegs:$src1),
+ ".error \"should not emit\"", []>;
+
+let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
+ isAsmParserOnly = 1 in
+def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64_0Imm:$src1),
+ "$dst = #$src1">;
+
+// Hexagon doesn't have a vector multiply with C semantics.
+// Instead, generate a pseudo instruction that gets expaneded into two
+// scalar MPYI instructions.
+// This is expanded by ExpandPostRAPseudos.
+let isPseudo = 1 in
+def PS_vmulw : PseudoM<(outs DoubleRegs:$Rd),
+ (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "", []>;
+
+let isPseudo = 1 in
+def PS_vmulw_acc : PseudoM<(outs DoubleRegs:$Rd),
+ (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), "", [],
+ "$Rd = $Rx">;
+
+def DuplexIClass0: InstDuplex < 0 >;
+def DuplexIClass1: InstDuplex < 1 >;
+def DuplexIClass2: InstDuplex < 2 >;
+let isExtendable = 1 in {
+ def DuplexIClass3: InstDuplex < 3 >;
+ def DuplexIClass4: InstDuplex < 4 >;
+ def DuplexIClass5: InstDuplex < 5 >;
+ def DuplexIClass6: InstDuplex < 6 >;
+ def DuplexIClass7: InstDuplex < 7 >;
+}
+def DuplexIClass8: InstDuplex < 8 >;
+def DuplexIClass9: InstDuplex < 9 >;
+def DuplexIClassA: InstDuplex < 0xA >;
+def DuplexIClassB: InstDuplex < 0xB >;
+def DuplexIClassC: InstDuplex < 0xC >;
+def DuplexIClassD: InstDuplex < 0xD >;
+def DuplexIClassE: InstDuplex < 0xE >;
+def DuplexIClassF: InstDuplex < 0xF >;
diff --git a/lib/Target/Hexagon/HexagonRDFOpt.cpp b/lib/Target/Hexagon/HexagonRDFOpt.cpp
index 30640e19ebac..b3aba50b5625 100644
--- a/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -94,7 +94,7 @@ struct HexagonDCE : public DeadCodeElimination {
bool HexagonCP::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
- auto mapRegs = [MI,&EM] (RegisterRef DstR, RegisterRef SrcR) -> void {
+ auto mapRegs = [&EM] (RegisterRef DstR, RegisterRef SrcR) -> void {
EM.insert(std::make_pair(DstR, SrcR));
};
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index d3f230d3f8a6..2a1bb63af789 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -36,6 +36,9 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#define GET_REGINFO_TARGET_DESC
+#include "HexagonGenRegisterInfo.inc"
+
using namespace llvm;
HexagonRegisterInfo::HexagonRegisterInfo()
@@ -125,6 +128,7 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
case HexagonSubtarget::V5:
case HexagonSubtarget::V55:
case HexagonSubtarget::V60:
+ case HexagonSubtarget::V62:
return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3;
}
@@ -133,25 +137,47 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
+const uint32_t *HexagonRegisterInfo::getCallPreservedMask(
+ const MachineFunction &MF, CallingConv::ID) const {
+ return HexagonCSR_RegMask;
+}
+
+
BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
const {
BitVector Reserved(getNumRegs());
Reserved.set(Hexagon::R29);
Reserved.set(Hexagon::R30);
Reserved.set(Hexagon::R31);
- Reserved.set(Hexagon::PC);
- Reserved.set(Hexagon::D14);
- Reserved.set(Hexagon::D15);
- Reserved.set(Hexagon::LC0);
- Reserved.set(Hexagon::LC1);
- Reserved.set(Hexagon::SA0);
- Reserved.set(Hexagon::SA1);
- Reserved.set(Hexagon::UGP);
- Reserved.set(Hexagon::GP);
- Reserved.set(Hexagon::CS0);
- Reserved.set(Hexagon::CS1);
- Reserved.set(Hexagon::CS);
- Reserved.set(Hexagon::USR);
+ // Control registers.
+ Reserved.set(Hexagon::SA0); // C0
+ Reserved.set(Hexagon::LC0); // C1
+ Reserved.set(Hexagon::SA1); // C2
+ Reserved.set(Hexagon::LC1); // C3
+ Reserved.set(Hexagon::P3_0); // C4
+ Reserved.set(Hexagon::USR); // C8
+ Reserved.set(Hexagon::PC); // C9
+ Reserved.set(Hexagon::UGP); // C10
+ Reserved.set(Hexagon::GP); // C11
+ Reserved.set(Hexagon::CS0); // C12
+ Reserved.set(Hexagon::CS1); // C13
+ Reserved.set(Hexagon::UPCYCLELO); // C14
+ Reserved.set(Hexagon::UPCYCLEHI); // C15
+ Reserved.set(Hexagon::FRAMELIMIT); // C16
+ Reserved.set(Hexagon::FRAMEKEY); // C17
+ Reserved.set(Hexagon::PKTCOUNTLO); // C18
+ Reserved.set(Hexagon::PKTCOUNTHI); // C19
+ Reserved.set(Hexagon::UTIMERLO); // C30
+ Reserved.set(Hexagon::UTIMERHI); // C31
+ // Out of the control registers, only C8 is explicitly defined in
+ // HexagonRegisterInfo.td. If others are defined, make sure to add
+ // them here as well.
+ Reserved.set(Hexagon::C8);
+ Reserved.set(Hexagon::USR_OVF);
+
+ for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x))
+ markSuperRegs(Reserved, x);
+
return Reserved;
}
@@ -267,6 +293,3 @@ unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const {
return Hexagon::R6;
}
-
-#define GET_REGINFO_TARGET_DESC
-#include "HexagonGenRegisterInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 1fb295b5bd8c..8a3f175b8488 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -35,7 +35,8 @@ public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF)
const override;
-
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
index a75f3514dbd2..93ab2f731207 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -140,41 +140,54 @@ let Namespace = "Hexagon" in {
}
// Control registers.
- def SA0 : Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>;
- def LC0 : Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>;
- def SA1 : Rc<2, "sa1", ["c2"]>, DwarfRegNum<[69]>;
- def LC1 : Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>;
- def P3_0 : Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>,
- DwarfRegNum<[71]>;
- def C5 : Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>; // future use
- def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[73]>;
- def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[74]>;
+ def SA0: Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>;
+ def LC0: Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>;
+ def SA1: Rc<2, "sa1", ["c2"]>, DwarfRegNum<[69]>;
+ def LC1: Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>;
+ def P3_0: Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>,
+ DwarfRegNum<[71]>;
+ // When defining more Cn registers, make sure to explicitly mark them
+ // as reserved in HexagonRegisterInfo.cpp.
+ def C5: Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>;
+ def C6: Rc<6, "c6", [], [M0]>, DwarfRegNum<[73]>;
+ def C7: Rc<7, "c7", [], [M1]>, DwarfRegNum<[74]>;
// Define C8 separately and make it aliased with USR.
// The problem is that USR has subregisters (e.g. overflow). If USR was
// specified as a subregister of C9_8, it would imply that subreg_overflow
// and isub_lo can be composed, which leads to all kinds of issues
// with lane masks.
- def C8 : Rc<8, "c8", [], [USR]>, DwarfRegNum<[75]>;
- def PC : Rc<9, "pc">, DwarfRegNum<[76]>;
- def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>;
- def GP : Rc<11, "gp", ["c11"]>, DwarfRegNum<[78]>;
- def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>;
- def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>;
- def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>;
- def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>;
+ def C8: Rc<8, "c8", [], [USR]>, DwarfRegNum<[75]>;
+ def PC: Rc<9, "pc">, DwarfRegNum<[76]>;
+ def UGP: Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>;
+ def GP: Rc<11, "gp", ["c11"]>, DwarfRegNum<[78]>;
+ def CS0: Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>;
+ def CS1: Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>;
+ def UPCYCLELO: Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>;
+ def UPCYCLEHI: Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>;
+ def FRAMELIMIT: Rc<16, "framelimit", ["c16"]>, DwarfRegNum<[83]>;
+ def FRAMEKEY: Rc<17, "framekey", ["c17"]>, DwarfRegNum<[84]>;
+ def PKTCOUNTLO: Rc<18, "pktcountlo", ["c18"]>, DwarfRegNum<[85]>;
+ def PKTCOUNTHI: Rc<19, "pktcounthi", ["c19"]>, DwarfRegNum<[86]>;
+ def UTIMERLO: Rc<30, "utimerlo", ["c30"]>, DwarfRegNum<[97]>;
+ def UTIMERHI: Rc<31, "utimerhi", ["c31"]>, DwarfRegNum<[98]>;
}
// Control registers pairs.
let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in {
- def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>;
- def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>;
- def C5_4 : Rcc<4, "c5:4", [P3_0, C5]>, DwarfRegNum<[71]>;
- def C7_6 : Rcc<6, "c7:6", [C6, C7], ["m1:0"]>, DwarfRegNum<[72]>;
+ def C1_0: Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>;
+ def C3_2: Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>;
+ def C5_4: Rcc<4, "c5:4", [P3_0, C5]>, DwarfRegNum<[71]>;
+ def C7_6: Rcc<6, "c7:6", [C6, C7], ["m1:0"]>, DwarfRegNum<[72]>;
// Use C8 instead of USR as a subregister of C9_8.
- def C9_8 : Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>;
- def C11_10 : Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>;
- def CS : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>;
- def UPC : Rcc<14, "c15:14", [UPCL, UPCH]>, DwarfRegNum<[80]>;
+ def C9_8: Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>;
+ def C11_10: Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>;
+ def CS: Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>;
+ def UPCYCLE: Rcc<14, "c15:14", [UPCYCLELO, UPCYCLEHI]>, DwarfRegNum<[80]>;
+ def C17_16: Rcc<16, "c17:16", [FRAMELIMIT, FRAMEKEY]>, DwarfRegNum<[83]>;
+ def PKTCOUNT: Rcc<18, "c19:18", [PKTCOUNTLO, PKTCOUNTHI], ["pktcount"]>,
+ DwarfRegNum<[85]>;
+ def UTIMER: Rcc<30, "c31:30", [UTIMERLO, UTIMERHI], ["utimer"]>,
+ DwarfRegNum<[97]>;
}
foreach i = 0-31 in {
@@ -219,6 +232,10 @@ def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
}
// Registers are listed in reverse order for allocation preference reasons.
+def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32,
+ (add R23, R22, R21, R20, R19, R18, R17,
+ R16, R7, R6, R5, R4, R3, R2, R1, R0)>;
+
def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
(add R7, R6, R5, R4, R3, R2, R1, R0)> ;
@@ -226,6 +243,10 @@ def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
(add (sequence "D%u", 0, 4),
(sequence "D%u", 6, 13), D5, D14, D15)>;
+def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64,
+ (add D11, D10, D9, D8, D3, D2, D1,
+ D0)>;
+
def VectorRegs : RegisterClass<"Hexagon", [v64i8, v32i16, v16i32, v8i64], 512,
(add (sequence "V%u", 0, 31))>;
@@ -259,28 +280,28 @@ def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>;
let Size = 32, isAllocatable = 0 in
def CtrRegs : RegisterClass<"Hexagon", [i32], 32,
- (add LC0, SA0, LC1, SA1,
- P3_0, C5,
- M0, M1, C6, C7, C8, CS0, CS1, UPCL, UPCH,
- USR, UGP, GP, PC)>;
+ (add LC0, SA0, LC1, SA1, P3_0, C5, C6, C7,
+ C8, PC, UGP, GP, CS0, CS1, UPCYCLELO, UPCYCLEHI,
+ FRAMELIMIT, FRAMEKEY, PKTCOUNTLO, PKTCOUNTHI, UTIMERLO, UTIMERHI,
+ M0, M1, USR)>;
let isAllocatable = 0 in
def UsrBits : RegisterClass<"Hexagon", [i1], 0, (add USR_OVF)>;
let Size = 64, isAllocatable = 0 in
def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64,
- (add C1_0, C3_2, C7_6, C9_8, C11_10, CS, UPC)>;
-
-def VolatileV3 {
- list<Register> Regs = [D0, D1, D2, D3, D4, D5, D6, D7,
- R28, R31,
- P0, P1, P2, P3,
- M0, M1,
- LC0, LC1, SA0, SA1, USR, USR_OVF, CS0, CS1,
- V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11,
- V12, V13, V14, V15, V16, V17, V18, V19, V20, V21,
- V22, V23, V24, V25, V26, V27, V28, V29, V30, V31,
- W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11,
- W12, W13, W14, W15,
- Q0, Q1, Q2, Q3];
-}
+ (add C1_0, C3_2, C5_4, C7_6, C9_8, C11_10, CS, UPCYCLE, C17_16,
+ PKTCOUNT, UTIMER)>;
+
+// These registers are new for v62 and onward.
+// The function RegisterMatchesArch() uses this list for validation.
+let isAllocatable = 0 in
+def V62Regs : RegisterClass<"Hexagon", [i32], 32,
+ (add FRAMELIMIT, FRAMEKEY, C17_16,
+ PKTCOUNTLO, PKTCOUNTHI, PKTCOUNT,
+ UTIMERLO, UTIMERHI, UTIMER)>;
+
+
+def HexagonCSR
+ : CalleeSavedRegs<(add R16, R17, R18, R19, R20, R21, R22, R23,
+ R24, R25, R26, R27)>;
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index 6e4987b7e4e3..9b5fbea04d18 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -21,4 +21,12 @@ include "HexagonScheduleV55.td"
//===----------------------------------------------------------------------===//
include "HexagonScheduleV60.td"
+include "HexagonIICScalar.td"
+include "HexagonIICHVX.td"
+
+//===----------------------------------------------------------------------===//
+// V62 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV62.td"
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
index 7416baab392c..880cc0a02b6a 100644
--- a/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -61,15 +61,21 @@ def J_tc_2early_SLOT23 : InstrItinClass;
def J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT : InstrItinClass;
def J_tc_2early_SLOT2 : InstrItinClass;
def LD_tc_ld_SLOT01 : InstrItinClass;
+def LD_tc_ld_pi_SLOT01 : InstrItinClass;
def LD_tc_ld_SLOT0 : InstrItinClass;
def LD_tc_3or4stall_SLOT0 : InstrItinClass;
def M_tc_2_SLOT23 : InstrItinClass;
+def M_tc_2_acc_SLOT23 : InstrItinClass;
def M_tc_3_SLOT23 : InstrItinClass;
def M_tc_1_SLOT23 : InstrItinClass;
def M_tc_3x_SLOT23 : InstrItinClass;
+def M_tc_3x_acc_SLOT23 : InstrItinClass;
def M_tc_3or4x_SLOT23 : InstrItinClass;
+def M_tc_3or4x_acc_SLOT23 : InstrItinClass;
def ST_tc_st_SLOT01 : InstrItinClass;
+def ST_tc_st_pi_SLOT01 : InstrItinClass;
def ST_tc_st_SLOT0 : InstrItinClass;
+def ST_tc_st_pi_SLOT0 : InstrItinClass;
def ST_tc_ld_SLOT0 : InstrItinClass;
def ST_tc_3stall_SLOT0 : InstrItinClass;
def S_2op_tc_1_SLOT23 : InstrItinClass;
@@ -131,21 +137,27 @@ def HexagonItinerariesV4 :
//Load
InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD_tc_ld_pi_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>]>,
InstrItinData<LD_tc_3or4stall_SLOT0 , [InstrStage<1, [SLOT0]>]>,
// M
InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_2_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3x_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3or4x_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
// Store
// ST
InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST_tc_st_pi_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
// ST0
InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<ST_tc_st_pi_SLOT0 , [InstrStage<1, [SLOT0]>]>,
InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>]>,
// S
diff --git a/lib/Target/Hexagon/HexagonScheduleV55.td b/lib/Target/Hexagon/HexagonScheduleV55.td
index b2a75f7200d7..06cbcb16abb7 100644
--- a/lib/Target/Hexagon/HexagonScheduleV55.td
+++ b/lib/Target/Hexagon/HexagonScheduleV55.td
@@ -88,6 +88,8 @@ def HexagonItinerariesV55 :
// Load
InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
[2, 1]>,
+ InstrItinData<LD_tc_ld_pi_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [2, 1]>,
InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<1, [SLOT0]>], [2, 1]>,
InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>], [2, 1]>,
@@ -96,21 +98,30 @@ def HexagonItinerariesV55 :
[1, 1, 1]>,
InstrItinData<M_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
[2, 1, 1]>,
+ InstrItinData<M_tc_2_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
InstrItinData<M_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
[1, 1, 1]>,
InstrItinData<M_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
[3, 1, 1]>,
+ InstrItinData<M_tc_3x_acc_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1, 1]>,
InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
[3, 1, 1]>,
+ InstrItinData<M_tc_3or4x_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
InstrItinData<M_tc_3stall_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
[3, 1, 1]>,
// Store
InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
[1, 1, 1]>,
+ InstrItinData<ST_tc_st_pi_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>],
+ [1, 1, 1]>,
InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<1, [SLOT0]>], [2, 1, 1]>,
InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>], [2, 1, 1]>,
InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>], [1, 1, 1]>,
+ InstrItinData<ST_tc_st_pi_SLOT0 , [InstrStage<1, [SLOT0]>], [1, 1, 1]>,
// S
InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
diff --git a/lib/Target/Hexagon/HexagonScheduleV60.td b/lib/Target/Hexagon/HexagonScheduleV60.td
index dc2ce43b0579..63784710f52b 100644
--- a/lib/Target/Hexagon/HexagonScheduleV60.td
+++ b/lib/Target/Hexagon/HexagonScheduleV60.td
@@ -19,6 +19,8 @@ def CVI_LD : FuncUnit;
def CVI_XLSHF : FuncUnit;
def CVI_MPY01 : FuncUnit;
def CVI_ALL : FuncUnit;
+def CVI_XLMPY0 : FuncUnit;
+def CVI_SHFMPY1: FuncUnit;
// Combined functional unit data.
def HexagonComboFuncsV60 :
@@ -26,7 +28,9 @@ def HexagonComboFuncsV60 :
ComboFuncData<CVI_XLSHF , [CVI_XLANE, CVI_SHIFT]>,
ComboFuncData<CVI_MPY01 , [CVI_MPY0, CVI_MPY1]>,
ComboFuncData<CVI_ALL , [CVI_ST, CVI_XLANE, CVI_SHIFT,
- CVI_MPY0, CVI_MPY1, CVI_LD]>
+ CVI_MPY0, CVI_MPY1, CVI_LD]>,
+ ComboFuncData<CVI_XLMPY0 , [CVI_XLANE, CVI_MPY0]>,
+ ComboFuncData<CVI_SHFMPY1 , [CVI_SHIFT, CVI_MPY1]>
]>;
// Note: When adding additional vector scheduling classes, add the
@@ -39,6 +43,7 @@ def CVI_VX : InstrItinClass;
def CVI_VX_DV_LONG : InstrItinClass;
def CVI_VX_DV : InstrItinClass;
def CVI_VX_DV_SLOT2 : InstrItinClass;
+def CVI_VX_DV_SLOT2_LONG_EARLY : InstrItinClass;
def CVI_VP : InstrItinClass;
def CVI_VP_LONG : InstrItinClass;
def CVI_VP_VS_EARLY : InstrItinClass;
@@ -150,22 +155,28 @@ def HexagonItinerariesV60 :
// Load
InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD_tc_ld_pi_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>,
InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
// M
InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_2_acc_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3x_acc_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3or4x_acc_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
// Store
InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST_tc_st_pi_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<ST_tc_st_pi_SLOT0 , [InstrStage<1, [SLOT0]>]>,
// S
InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
diff --git a/lib/Target/Hexagon/HexagonScheduleV62.td b/lib/Target/Hexagon/HexagonScheduleV62.td
new file mode 100644
index 000000000000..0758788a600b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonScheduleV62.td
@@ -0,0 +1,129 @@
+//=-HexagonScheduleV62.td - HexagonV62 Scheduling Definitions *- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// V62 follows the same schedule as V60 with following exceptions:
+// Following instructions are permissible on any slot on V62:
+// V4_J4_cmpeq_fp0_jump_nt
+// V4_J4_cmpeq_fp0_jump_t
+// V4_J4_cmpeq_fp1_jump_nt
+// V4_J4_cmpeq_fp1_jump_t
+// V4_J4_cmpeq_tp0_jump_nt
+// V4_J4_cmpeq_tp0_jump_t
+// V4_J4_cmpeq_tp1_jump_nt
+// V4_J4_cmpeq_tp1_jump_t
+// V4_J4_cmpeqi_fp0_jump_nt
+// V4_J4_cmpeqi_fp0_jump_t
+// V4_J4_cmpeqi_fp1_jump_nt
+// V4_J4_cmpeqi_fp1_jump_t
+// V4_J4_cmpeqi_tp0_jump_nt
+// V4_J4_cmpeqi_tp0_jump_t
+// V4_J4_cmpeqi_tp1_jump_nt
+// V4_J4_cmpeqi_tp1_jump_t
+// V4_J4_cmpeqn1_fp0_jump_nt
+// V4_J4_cmpeqn1_fp0_jump_t
+// V4_J4_cmpeqn1_fp1_jump_nt
+// V4_J4_cmpeqn1_fp1_jump_t
+// V4_J4_cmpeqn1_tp0_jump_nt
+// V4_J4_cmpeqn1_tp0_jump_t
+// V4_J4_cmpeqn1_tp1_jump_nt
+// V4_J4_cmpeqn1_tp1_jump_t
+// V4_J4_cmpgt_fp0_jump_nt
+// V4_J4_cmpgt_fp0_jump_t
+// V4_J4_cmpgt_fp1_jump_nt
+// V4_J4_cmpgt_fp1_jump_t
+// V4_J4_cmpgt_tp0_jump_nt
+// V4_J4_cmpgt_tp0_jump_t
+// V4_J4_cmpgt_tp1_jump_nt
+// V4_J4_cmpgt_tp1_jump_t
+// V4_J4_cmpgti_fp0_jump_nt
+// V4_J4_cmpgti_fp0_jump_t
+// V4_J4_cmpgti_fp1_jump_nt
+// V4_J4_cmpgti_fp1_jump_t
+// V4_J4_cmpgti_tp0_jump_nt
+// V4_J4_cmpgti_tp0_jump_t
+// V4_J4_cmpgti_tp1_jump_nt
+// V4_J4_cmpgti_tp1_jump_t
+// V4_J4_cmpgtn1_fp0_jump_nt
+// V4_J4_cmpgtn1_fp0_jump_t
+// V4_J4_cmpgtn1_fp1_jump_nt
+// V4_J4_cmpgtn1_fp1_jump_t
+// V4_J4_cmpgtn1_tp0_jump_nt
+// V4_J4_cmpgtn1_tp0_jump_t
+// V4_J4_cmpgtn1_tp1_jump_nt
+// V4_J4_cmpgtn1_tp1_jump_t
+// V4_J4_cmpgtu_fp0_jump_nt
+// V4_J4_cmpgtu_fp0_jump_t
+// V4_J4_cmpgtu_fp1_jump_nt
+// V4_J4_cmpgtu_fp1_jump_t
+// V4_J4_cmpgtu_tp0_jump_nt
+// V4_J4_cmpgtu_tp0_jump_t
+// V4_J4_cmpgtu_tp1_jump_nt
+// V4_J4_cmpgtu_tp1_jump_t
+// V4_J4_cmpgtui_fp0_jump_nt
+// V4_J4_cmpgtui_fp0_jump_t
+// V4_J4_cmpgtui_fp1_jump_nt
+// V4_J4_cmpgtui_fp1_jump_t
+// V4_J4_cmpgtui_tp0_jump_nt
+// V4_J4_cmpgtui_tp0_jump_t
+// V4_J4_cmpgtui_tp1_jump_nt
+// V4_J4_cmpgtui_tp1_jump_t
+// V4_J4_tstbit0_fp0_jump_nt
+// V4_J4_tstbit0_fp0_jump_t
+// V4_J4_tstbit0_fp1_jump_nt
+// V4_J4_tstbit0_fp1_jump_t
+// V4_J4_tstbit0_tp0_jump_nt
+// V4_J4_tstbit0_tp0_jump_t
+// V4_J4_tstbit0_tp1_jump_nt
+// V4_J4_tstbit0_tp1_jump_t
+// JMP
+// JMPEXT
+// JMPEXT_f
+// JMPEXT_fnew_nt
+// JMPEXT_fnew_t
+// JMPEXT_t
+// JMPEXT_tnew_nt
+// JMPEXT_tnew_t
+// JMPNOTEXT
+// JMPNOTEXT_f
+// JMPNOTEXT_fnew_nt
+// JMPNOTEXT_fnew_t
+// JMPNOTEXT_t
+// JMPNOTEXT_tnew_nt
+// JMPNOTEXT_tnew_t
+// JMP_f
+// JMP_fnew_nt
+// JMP_fnew_t
+// JMP_t
+// JMP_tnew_nt
+// JMP_tnew_t
+// RESTORE_DEALLOC_RET_JMP_V4
+// RESTORE_DEALLOC_RET_JMP_V4_EXT
+
+def HexagonV62ItinList : ScalarItin, HVXV62Itin {
+ list<InstrItinData> ItinList =
+ !listconcat(ScalarItin_list, HVXV62Itin_list);
+}
+
+def HexagonItinerariesV62 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL],
+ [], HexagonV62ItinList.ItinList>;
+
+def HexagonModelV62 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV62;
+ let LoadLatency = 1;
+ let CompleteModel = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V62 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index 10730536080e..002e87fb32ce 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -51,11 +51,12 @@ SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy(
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getTargetExternalSymbol(SpecialMemcpyName,
- TLI.getPointerTy(DAG.getDataLayout()), Flags),
- std::move(Args))
+ .setLibCallee(
+ TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getTargetExternalSymbol(
+ SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout()), Flags),
+ std::move(Args))
.setDiscardResult();
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp
index 2c937216d463..471e32221b29 100644
--- a/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -131,13 +131,15 @@ namespace {
INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double",
"Hexagon Split Double Registers", false, false)
-void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
const USet &Part, const TargetRegisterInfo &TRI) {
dbgs() << '{';
for (auto I : Part)
dbgs() << ' ' << PrintReg(I, &TRI);
dbgs() << " }";
}
+#endif
bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
for (auto I : IRM) {
@@ -391,7 +393,7 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
const {
- unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0;
+ unsigned FixedNum = 0, LoopPhiNum = 0;
int32_t TotalP = 0;
for (unsigned DR : Part) {
@@ -428,7 +430,6 @@ bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
LoopPhiNum++;
}
// Splittable instruction.
- SplitNum++;
int32_t P = profit(UseI);
if (P == std::numeric_limits<int>::min())
return false;
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 8c23a2465dd6..033b93fc910a 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -88,6 +88,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
{ "hexagonv5", V5 },
{ "hexagonv55", V55 },
{ "hexagonv60", V60 },
+ { "hexagonv62", V62 },
};
auto foundIt = CpuTable.find(CPUString);
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index f2b9cdaad1ae..6a3e7f13be4c 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -38,9 +38,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
bool ModeIEEERndNear;
public:
- enum HexagonArchEnum {
- V4, V5, V55, V60
- };
+#include "HexagonDepArch.h"
HexagonArchEnum HexagonArchVersion;
/// True if the target should use Back-Skip-Back scheduling. This is the
@@ -98,6 +96,9 @@ public:
bool hasV55TOpsOnly() const { return getHexagonArchVersion() == V55; }
bool hasV60TOps() const { return getHexagonArchVersion() >= V60; }
bool hasV60TOpsOnly() const { return getHexagonArchVersion() == V60; }
+ bool hasV62TOps() const { return getHexagonArchVersion() >= V62; }
+ bool hasV62TOpsOnly() const { return getHexagonArchVersion() == V62; }
+
bool modeIEEERndNear() const { return ModeIEEERndNear; }
bool useHVXOps() const { return UseHVXOps; }
bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; }
diff --git a/lib/Target/Hexagon/HexagonSystemInst.td b/lib/Target/Hexagon/HexagonSystemInst.td
deleted file mode 100644
index 629a98749ee9..000000000000
--- a/lib/Target/Hexagon/HexagonSystemInst.td
+++ /dev/null
@@ -1,134 +0,0 @@
-//==- HexagonSystemInst.td - System Instructions for Hexagon -*- tablegen -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Cache manipulation instructions.
-//===----------------------------------------------------------------------===//
-let mayStore = 1 in
-class ST_MISC_CACHEOP<dag outs, dag ins,
- string asmstr, list<dag> pattern = [],
- bits<3> amode, bits<3> type, bits<1> un>
- : ST0Inst<outs, ins, asmstr, pattern, "", ST_tc_ld_SLOT0> {
-
- bits<5> Rs;
- bits<5> Rt;
- bits<5> Rd;
- let Inst{31-28} = 0b1010;
- let Inst{27-25} = amode;
- let Inst{24-22} = type;
- let Inst{21} = un;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4-0} = Rd;
-}
-
-let mayStore = 1 in
-class ST_MISC_CACHEOP_SYS<dag outs, dag ins,
- string asmstr, list<dag> pattern = [],
- bits<3> amode, bits<3> type, bits<1> un>
- : SYSInst<outs, ins, asmstr, pattern, ""> {
-
- bits<5> Rs;
- bits<5> Rt;
- bits<5> Rd;
- let Inst{31-28} = 0b1010;
- let Inst{27-25} = amode;
- let Inst{24-22} = type;
- let Inst{21} = un;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4-0} = Rd;
-}
-
-
-let isSolo = 1, Rs = 0, Rt = 0, Rd = 0 in {
-def Y2_syncht: ST_MISC_CACHEOP <(outs), (ins),
- "syncht" , [], 0b100, 0b001, 0b0>;
-}
-
-let Rt = 0, Rd = 0 in {
-let isSoloAin1 = 1 in {
- def Y2_dccleana: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
- "dccleana($Rs)", [], 0b000, 0b000, 0b0>;
- def Y2_dcinva: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
- "dcinva($Rs)", [], 0b000, 0b000, 0b1>;
- def Y2_dccleaninva: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
- "dccleaninva($Rs)", [], 0b000, 0b001, 0b0>;
- }
-}
-
-let isSoloAX = 1, hasSideEffects = 1, Rd = 0 in {
- def Y4_l2fetch: ST_MISC_CACHEOP_SYS<(outs), (ins IntRegs:$Rs, IntRegs:$Rt),
- "l2fetch($Rs, $Rt)", [], 0b011, 0b000, 0b0>;
- def Y5_l2fetch: ST_MISC_CACHEOP_SYS<(outs), (ins IntRegs:$Rs, DoubleRegs:$Rt),
- "l2fetch($Rs, $Rt)", [], 0b011, 0b010, 0b0>;
-}
-
-let hasSideEffects = 0, isSolo = 1 in
-class Y2_INVALIDATE_CACHE<string mnemonic, bit MajOp>
- : JRInst <
- (outs), (ins IntRegs:$Rs),
- #mnemonic#"($Rs)" > {
- bits<5> Rs;
-
- let IClass = 0b0101;
- let Inst{27-21} = 0b0110110;
- let Inst{20-16} = Rs;
- let Inst{13-12} = 0b00;
- let Inst{11} = MajOp;
- }
-// Instruction cache invalidate
-def Y2_icinva : Y2_INVALIDATE_CACHE<"icinva", 0b0>;
-
-// Zero an aligned 32-byte cacheline.
-let isSoloAin1 = 1 in
-def Y2_dczeroa: ST0Inst <(outs), (ins IntRegs:$Rs),
- "dczeroa($Rs)"> {
- bits<5> Rs;
- let IClass = 0b1010;
- let Inst{27-21} = 0b0000110;
- let Inst{13} = 0b0;
- let Inst{20-16} = Rs;
- }
-
-// Memory synchronization.
-let hasSideEffects = 0, isSolo = 1 in
-def Y2_isync: JRInst <(outs), (ins),
- "isync"> {
- let IClass = 0b0101;
- let Inst{27-16} = 0b011111000000;
- let Inst{13} = 0b0;
- let Inst{9-0} = 0b0000000010;
- }
-
-//===----------------------------------------------------------------------===//
-// System/User instructions.
-//===----------------------------------------------------------------------===//
-// traps and pause
-let hasSideEffects = 0, isSolo = 1 in
-class J2_MISC_TRAP_PAUSE<string mnemonic, bits<2> MajOp>
- : JRInst
- <(outs), (ins u8_0Imm:$u8),
- #mnemonic#"(#$u8)"> {
- bits<8> u8;
-
- let IClass = 0b0101;
- let Inst{27-24} = 0b0100;
- let Inst{23-22} = MajOp;
- let Inst{12-8} = u8{7-3};
- let Inst{4-2} = u8{2-0};
- }
-def J2_trap0 : J2_MISC_TRAP_PAUSE<"trap0", 0b00>;
-def J2_trap1 : J2_MISC_TRAP_PAUSE<"trap1", 0b10>;
-def J2_pause : J2_MISC_TRAP_PAUSE<"pause", 0b01>;
-
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 132d12a66d46..06fc9195fa67 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
using namespace llvm;
@@ -98,11 +99,6 @@ static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print",
extern "C" int HexagonTargetMachineModule;
int HexagonTargetMachineModule = 0;
-extern "C" void LLVMInitializeHexagonTarget() {
- // Register the target.
- RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
-}
-
static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
return new VLIWMachineScheduler(C, make_unique<ConvergingVLIWScheduler>());
}
@@ -114,6 +110,8 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
namespace llvm {
extern char &HexagonExpandCondsetsID;
void initializeHexagonExpandCondsetsPass(PassRegistry&);
+ void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
+ Pass *createHexagonLoopIdiomPass();
FunctionPass *createHexagonBitSimplify();
FunctionPass *createHexagonBranchRelaxation();
@@ -150,6 +148,12 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
+extern "C" void LLVMInitializeHexagonTarget() {
+ // Register the target.
+ RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
+ initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+}
+
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
@@ -172,11 +176,11 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
const HexagonSubtarget *
HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
- AttributeSet FnAttrs = F.getAttributes();
+ AttributeList FnAttrs = F.getAttributes();
Attribute CPUAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ FnAttrs.getAttribute(AttributeList::FunctionIndex, "target-cpu");
Attribute FSAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+ FnAttrs.getAttribute(AttributeList::FunctionIndex, "target-features");
std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
? CPUAttr.getValueAsString().str()
@@ -196,6 +200,14 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
return I.get();
}
+void HexagonTargetMachine::adjustPassManager(PassManagerBuilder &PMB) {
+ PMB.addExtension(
+ PassManagerBuilder::EP_LateLoopOptimizations,
+ [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ PM.add(createHexagonLoopIdiomPass());
+ });
+}
+
TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() {
return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(HexagonTTIImpl(this, F));
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 70835c0d4ac5..3d01929fbfb8 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -37,6 +37,7 @@ public:
static unsigned getModuleMatchQuality(const Module &M);
+ void adjustPassManager(PassManagerBuilder &PMB) override;
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
TargetIRAnalysis getTargetIRAnalysis() override;
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 7b1247d815a5..3a789a5f7e0b 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -440,7 +440,7 @@ bool HexagonPacketizerList::promoteToDotNew(MachineInstr &MI,
}
bool HexagonPacketizerList::demoteToDotOld(MachineInstr &MI) {
- int NewOpcode = HII->getDotOldOp(MI.getOpcode());
+ int NewOpcode = HII->getDotOldOp(MI);
MI.setDesc(HII->get(NewOpcode));
return true;
}
@@ -720,6 +720,8 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
// %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
// S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
for (auto &MO : PacketMI.operands()) {
+ if (MO.isRegMask() && MO.clobbersPhysReg(DepReg))
+ return false;
if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
continue;
unsigned R = MO.getReg();
@@ -759,9 +761,12 @@ bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr &MI,
}
static bool isImplicitDependency(const MachineInstr &I, unsigned DepReg) {
- for (auto &MO : I.operands())
+ for (auto &MO : I.operands()) {
+ if (MO.isRegMask() && MO.clobbersPhysReg(DepReg))
+ return true;
if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
return true;
+ }
return false;
}
@@ -1046,7 +1051,9 @@ static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
// XTYPE instructions. Since there is no convenient way of identifying fp
// XTYPE instructions, only allow grouping with ALU32 for now.
unsigned TJ = HII.getType(MJ);
- if (TJ != HexagonII::TypeALU32)
+ if (TJ != HexagonII::TypeALU32_2op &&
+ TJ != HexagonII::TypeALU32_3op &&
+ TJ != HexagonII::TypeALU32_ADDI)
return true;
break;
}
@@ -1171,6 +1178,36 @@ bool HexagonPacketizerList::hasControlDependence(const MachineInstr &I,
(J.isBranch() || J.isCall() || J.isBarrier());
}
+bool HexagonPacketizerList::hasRegMaskDependence(const MachineInstr &I,
+ const MachineInstr &J) {
+ // Adding I to a packet that has J.
+
+ // Regmasks are not reflected in the scheduling dependency graph, so
+ // we need to check them manually. This code assumes that regmasks only
+ // occur on calls, and the problematic case is when we add an instruction
+ // defining a register R to a packet that has a call that clobbers R via
+ // a regmask. Those cannot be packetized together, because the call will
+ // be executed last. That's also a reson why it is ok to add a call
+ // clobbering R to a packet that defines R.
+
+ // Look for regmasks in J.
+ for (const MachineOperand &OpJ : J.operands()) {
+ if (!OpJ.isRegMask())
+ continue;
+ assert((J.isCall() || HII->isTailCall(J)) && "Regmask on a non-call");
+ for (const MachineOperand &OpI : I.operands()) {
+ if (OpI.isReg()) {
+ if (OpJ.clobbersPhysReg(OpI.getReg()))
+ return true;
+ } else if (OpI.isRegMask()) {
+ // Both are regmasks. Assume that they intersect.
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr &I,
const MachineInstr &J) {
bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
@@ -1217,6 +1254,14 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
if (Dependence)
return false;
+ // Regmasks are not accounted for in the scheduling graph, so we need
+ // to explicitly check for dependencies caused by them. They should only
+ // appear on calls, so it's not too pessimistic to reject all regmask
+ // dependencies.
+ Dependence = hasRegMaskDependence(I, J);
+ if (Dependence)
+ return false;
+
// V4 allows dual stores. It does not allow second store, if the first
// store is not in SLOT0. New value store, new value jump, dealloc_return
// and memop always take SLOT0. Arch spec 3.4.4.2.
@@ -1465,13 +1510,19 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// R0 = ... ; SUI
// Those cannot be packetized together, since the call will observe
// the effect of the assignment to R0.
- if (DepType == SDep::Anti && J.isCall()) {
+ if ((DepType == SDep::Anti || DepType == SDep::Output) && J.isCall()) {
// Check if I defines any volatile register. We should also check
// registers that the call may read, but these happen to be a
// subset of the volatile register set.
- for (const MCPhysReg *P = J.getDesc().ImplicitDefs; P && *P; ++P) {
- if (!I.modifiesRegister(*P, HRI))
+ for (const MachineOperand &Op : I.operands()) {
+ if (Op.isReg() && Op.isDef()) {
+ unsigned R = Op.getReg();
+ if (!J.readsRegister(R, HRI) && !J.modifiesRegister(R, HRI))
+ continue;
+ } else if (!Op.isRegMask()) {
+ // If I has a regmask assume dependency.
continue;
+ }
FoundSequentialDependence = true;
break;
}
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index b28b926ec300..3f28dc5b79ce 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -7,6 +7,9 @@
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
namespace llvm {
+class HexagonInstrInfo;
+class HexagonRegisterInfo;
+
class HexagonPacketizerList : public VLIWPacketizerList {
// Vector of instructions assigned to the packet that has just been created.
std::vector<MachineInstr*> OldPacketMIs;
@@ -109,6 +112,7 @@ protected:
void reserveResourcesForConstExt();
bool hasDeadDependence(const MachineInstr &I, const MachineInstr &J);
bool hasControlDependence(const MachineInstr &I, const MachineInstr &J);
+ bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J);
bool hasV4SpecificDependence(const MachineInstr &I, const MachineInstr &J);
bool producesStall(const MachineInstr &MI);
};
diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt
index e0077acc7af0..7a27a8c5e10f 100644
--- a/lib/Target/Hexagon/LLVMBuild.txt
+++ b/lib/Target/Hexagon/LLVMBuild.txt
@@ -36,6 +36,7 @@ required_libraries =
HexagonAsmParser
HexagonDesc
HexagonInfo
+ IPO
MC
Scalar
SelectionDAG
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index c140bd1d7ee2..337af294eb86 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -9,10 +9,10 @@
#include "Hexagon.h"
#include "HexagonFixupKinds.h"
-#include "HexagonMCTargetDesc.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCChecker.h"
#include "MCTargetDesc/HexagonMCCodeEmitter.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCShuffler.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -59,9 +59,10 @@ class HexagonAsmBackend : public MCAsmBackend {
RF.getFixups() = Fixups;
}
public:
- HexagonAsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) :
- OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *),
- Extender(nullptr) {}
+ HexagonAsmBackend(const Target &T, const Triple &TT, uint8_t OSABI,
+ StringRef CPU) :
+ OSABI(OSABI), CPU(CPU), MCII(T.createMCInstrInfo()),
+ RelaxTarget(new MCInst *), Extender(nullptr) {}
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createHexagonELFObjectWriter(OS, OSABI, CPU);
@@ -88,101 +89,101 @@ public:
// This table *must* be in same the order of fixup_* kinds in
// HexagonFixupKinds.h.
//
- // namei offset bits flags
- { "fixup_Hexagon_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B15_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B7_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_LO16", 0, 32, 0 },
- { "fixup_Hexagon_HI16", 0, 32, 0 },
- { "fixup_Hexagon_32", 0, 32, 0 },
- { "fixup_Hexagon_16", 0, 32, 0 },
- { "fixup_Hexagon_8", 0, 32, 0 },
- { "fixup_Hexagon_GPREL16_0", 0, 32, 0 },
- { "fixup_Hexagon_GPREL16_1", 0, 32, 0 },
- { "fixup_Hexagon_GPREL16_2", 0, 32, 0 },
- { "fixup_Hexagon_GPREL16_3", 0, 32, 0 },
- { "fixup_Hexagon_HL16", 0, 32, 0 },
- { "fixup_Hexagon_B13_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B9_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B32_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_B22_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B15_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B13_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B9_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B7_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_16_X", 0, 32, 0 },
- { "fixup_Hexagon_12_X", 0, 32, 0 },
- { "fixup_Hexagon_11_X", 0, 32, 0 },
- { "fixup_Hexagon_10_X", 0, 32, 0 },
- { "fixup_Hexagon_9_X", 0, 32, 0 },
- { "fixup_Hexagon_8_X", 0, 32, 0 },
- { "fixup_Hexagon_7_X", 0, 32, 0 },
- { "fixup_Hexagon_6_X", 0, 32, 0 },
- { "fixup_Hexagon_32_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_COPY", 0, 32, 0 },
- { "fixup_Hexagon_GLOB_DAT", 0, 32, 0 },
- { "fixup_Hexagon_JMP_SLOT", 0, 32, 0 },
- { "fixup_Hexagon_RELATIVE", 0, 32, 0 },
- { "fixup_Hexagon_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_GOTREL_LO16", 0, 32, 0 },
- { "fixup_Hexagon_GOTREL_HI16", 0, 32, 0 },
- { "fixup_Hexagon_GOTREL_32", 0, 32, 0 },
- { "fixup_Hexagon_GOT_LO16", 0, 32, 0 },
- { "fixup_Hexagon_GOT_HI16", 0, 32, 0 },
- { "fixup_Hexagon_GOT_32", 0, 32, 0 },
- { "fixup_Hexagon_GOT_16", 0, 32, 0 },
- { "fixup_Hexagon_DTPMOD_32", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_LO16", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_HI16", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_32", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_16", 0, 32, 0 },
- { "fixup_Hexagon_GD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_LD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_GD_GOT_LO16", 0, 32, 0 },
- { "fixup_Hexagon_GD_GOT_HI16", 0, 32, 0 },
- { "fixup_Hexagon_GD_GOT_32", 0, 32, 0 },
- { "fixup_Hexagon_GD_GOT_16", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_LO16", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_HI16", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_32", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_16", 0, 32, 0 },
- { "fixup_Hexagon_IE_LO16", 0, 32, 0 },
- { "fixup_Hexagon_IE_HI16", 0, 32, 0 },
- { "fixup_Hexagon_IE_32", 0, 32, 0 },
- { "fixup_Hexagon_IE_16", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_LO16", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_HI16", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_32", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_16", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_LO16", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_HI16", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_32", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_16", 0, 32, 0 },
- { "fixup_Hexagon_6_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_GOTREL_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_GOTREL_16_X", 0, 32, 0 },
- { "fixup_Hexagon_GOTREL_11_X", 0, 32, 0 },
- { "fixup_Hexagon_GOT_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_GOT_16_X", 0, 32, 0 },
- { "fixup_Hexagon_GOT_11_X", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_16_X", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_11_X", 0, 32, 0 },
- { "fixup_Hexagon_GD_GOT_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_GD_GOT_16_X", 0, 32, 0 },
- { "fixup_Hexagon_GD_GOT_11_X", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_16_X", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_11_X", 0, 32, 0 },
- { "fixup_Hexagon_IE_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_IE_16_X", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_16_X", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_11_X", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_16_X", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_11_X", 0, 32, 0 }
+ // namei offset bits flags
+ { "fixup_Hexagon_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B15_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B7_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_32", 0, 32, 0 },
+ { "fixup_Hexagon_16", 0, 32, 0 },
+ { "fixup_Hexagon_8", 0, 32, 0 },
+ { "fixup_Hexagon_GPREL16_0", 0, 32, 0 },
+ { "fixup_Hexagon_GPREL16_1", 0, 32, 0 },
+ { "fixup_Hexagon_GPREL16_2", 0, 32, 0 },
+ { "fixup_Hexagon_GPREL16_3", 0, 32, 0 },
+ { "fixup_Hexagon_HL16", 0, 32, 0 },
+ { "fixup_Hexagon_B13_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B9_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B32_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_B22_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B15_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B13_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B9_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B7_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_12_X", 0, 32, 0 },
+ { "fixup_Hexagon_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_10_X", 0, 32, 0 },
+ { "fixup_Hexagon_9_X", 0, 32, 0 },
+ { "fixup_Hexagon_8_X", 0, 32, 0 },
+ { "fixup_Hexagon_7_X", 0, 32, 0 },
+ { "fixup_Hexagon_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_32_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_COPY", 0, 32, 0 },
+ { "fixup_Hexagon_GLOB_DAT", 0, 32, 0 },
+ { "fixup_Hexagon_JMP_SLOT", 0, 32, 0 },
+ { "fixup_Hexagon_RELATIVE", 0, 32, 0 },
+ { "fixup_Hexagon_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_GOTREL_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_GOTREL_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_GOTREL_32", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_32", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_16", 0, 32, 0 },
+ { "fixup_Hexagon_DTPMOD_32", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_32", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_16", 0, 32, 0 },
+ { "fixup_Hexagon_GD_PLT_B22_PCREL",0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_LD_PLT_B22_PCREL",0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_GD_GOT_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_GD_GOT_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_GD_GOT_32", 0, 32, 0 },
+ { "fixup_Hexagon_GD_GOT_16", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_32", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_16", 0, 32, 0 },
+ { "fixup_Hexagon_IE_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_IE_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_IE_32", 0, 32, 0 },
+ { "fixup_Hexagon_IE_16", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_32", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_16", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_32", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_16", 0, 32, 0 },
+ { "fixup_Hexagon_6_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_GOTREL_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_GOTREL_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_GOTREL_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_GD_GOT_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_GD_GOT_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_GD_GOT_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_IE_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_IE_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_11_X", 0, 32, 0 }
};
if (Kind < FirstTargetFixupKind)
@@ -401,7 +402,8 @@ public:
/// data fragment, at the offset specified by the fixup and following the
/// fixup kind as appropriate.
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t FixupValue, bool IsPCRel) const override {
+ uint64_t FixupValue, bool IsPCRel,
+ MCContext &Ctx) const override {
// When FixupValue is 0 the relocation is external and there
// is nothing for us to do.
@@ -524,10 +526,9 @@ public:
bool Relaxable = false;
// Branches and loop-setup insns are handled as necessary by relaxation.
if (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeJ ||
- (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) ==
- HexagonII::TypeCOMPOUND &&
+ (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeCJ &&
MCID.isBranch()) ||
- (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeNV &&
+ (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeNCJ &&
MCID.isBranch()) ||
(llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeCR &&
HMI.getOpcode() != Hexagon::C4_addipc))
@@ -724,7 +725,8 @@ public:
Size = 0;
}
}
- bool Error = HexagonMCShuffle(*MCII, RF.getSubtargetInfo(), Inst);
+ bool Error = HexagonMCShuffle(true, *MCII, RF.getSubtargetInfo(),
+ Inst);
//assert(!Error);
(void)Error;
ReplaceInstruction(Asm.getEmitter(), RF, Inst);
@@ -739,15 +741,17 @@ public:
}
}
}
-};
-} // end anonymous namespace
+}; // class HexagonAsmBackend
-namespace llvm {
-MCAsmBackend *createHexagonAsmBackend(Target const &T,
+} // namespace
+
+// MCAsmBackend
+MCAsmBackend *llvm::createHexagonAsmBackend(Target const &T,
MCRegisterInfo const & /*MRI*/,
const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
- return new HexagonAsmBackend(T, OSABI, CPU);
-}
+
+ StringRef CPUString = Hexagon_MC::selectHexagonCPU(TT, CPU);
+ return new HexagonAsmBackend(T, TT, OSABI, CPUString);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 4292f6b3faa4..9c80312b790d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -17,6 +17,7 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H
#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H
+#include "HexagonDepITypes.h"
#include "HexagonMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
#include <stdint.h>
@@ -27,57 +28,14 @@ namespace llvm {
/// instruction info tracks.
///
namespace HexagonII {
- // *** The code below must match HexagonInstrFormat*.td *** //
-
- // Insn types.
- // *** Must match HexagonInstrFormat*.td ***
- enum Type {
- TypePSEUDO = 0,
- TypeALU32 = 1,
- TypeCR = 2,
- TypeJR = 3,
- TypeJ = 4,
- TypeLD = 5,
- TypeST = 6,
- TypeSYSTEM = 7,
- TypeXTYPE = 8,
- TypeV4LDST = 9,
- TypeNV = 10,
- TypeDUPLEX = 11,
- TypeCOMPOUND = 12,
- TypeCVI_FIRST = 13,
- TypeCVI_VA = TypeCVI_FIRST,
- TypeCVI_VA_DV = 14,
- TypeCVI_VX = 15,
- TypeCVI_VX_DV = 16,
- TypeCVI_VP = 17,
- TypeCVI_VP_VS = 18,
- TypeCVI_VS = 19,
- TypeCVI_VINLANESAT= 20,
- TypeCVI_VM_LD = 21,
- TypeCVI_VM_TMP_LD = 22,
- TypeCVI_VM_CUR_LD = 23,
- TypeCVI_VM_VP_LDU = 24,
- TypeCVI_VM_ST = 25,
- TypeCVI_VM_NEW_ST = 26,
- TypeCVI_VM_STU = 27,
- TypeCVI_HIST = 28,
- TypeCVI_LAST = TypeCVI_HIST,
- TypePREFIX = 30, // Such as extenders.
- TypeENDLOOP = 31 // Such as end of a HW loop.
- };
+ unsigned const TypeCVI_FIRST = TypeCVI_HIST;
+ unsigned const TypeCVI_LAST = TypeCVI_VX_DV;
enum SubTarget {
- HasV2SubT = 0xf,
- HasV2SubTOnly = 0x1,
- NoV2SubT = 0x0,
- HasV3SubT = 0xe,
- HasV3SubTOnly = 0x2,
- NoV3SubT = 0x1,
- HasV4SubT = 0xc,
- NoV4SubT = 0x3,
- HasV5SubT = 0x8,
- NoV5SubT = 0x7
+ HasV4SubT = 0x3f,
+ HasV5SubT = 0x3e,
+ HasV55SubT = 0x3c,
+ HasV60SubT = 0x38,
};
enum AddrMode {
@@ -107,102 +65,101 @@ namespace HexagonII {
enum {
// This 5-bit field describes the insn type.
TypePos = 0,
- TypeMask = 0x1f,
+ TypeMask = 0x3f,
// Solo instructions.
- SoloPos = 5,
+ SoloPos = 6,
SoloMask = 0x1,
// Packed only with A or X-type instructions.
- SoloAXPos = 6,
+ SoloAXPos = 7,
SoloAXMask = 0x1,
// Only A-type instruction in first slot or nothing.
- SoloAin1Pos = 7,
+ SoloAin1Pos = 8,
SoloAin1Mask = 0x1,
// Predicated instructions.
- PredicatedPos = 8,
+ PredicatedPos = 9,
PredicatedMask = 0x1,
- PredicatedFalsePos = 9,
+ PredicatedFalsePos = 10,
PredicatedFalseMask = 0x1,
- PredicatedNewPos = 10,
+ PredicatedNewPos = 11,
PredicatedNewMask = 0x1,
- PredicateLatePos = 11,
+ PredicateLatePos = 12,
PredicateLateMask = 0x1,
// New-Value consumer instructions.
- NewValuePos = 12,
+ NewValuePos = 13,
NewValueMask = 0x1,
// New-Value producer instructions.
- hasNewValuePos = 13,
+ hasNewValuePos = 14,
hasNewValueMask = 0x1,
// Which operand consumes or produces a new value.
- NewValueOpPos = 14,
+ NewValueOpPos = 15,
NewValueOpMask = 0x7,
// Stores that can become new-value stores.
- mayNVStorePos = 17,
+ mayNVStorePos = 18,
mayNVStoreMask = 0x1,
// New-value store instructions.
- NVStorePos = 18,
+ NVStorePos = 19,
NVStoreMask = 0x1,
// Loads that can become current-value loads.
- mayCVLoadPos = 19,
+ mayCVLoadPos = 20,
mayCVLoadMask = 0x1,
// Current-value load instructions.
- CVLoadPos = 20,
+ CVLoadPos = 21,
CVLoadMask = 0x1,
// Extendable insns.
- ExtendablePos = 21,
+ ExtendablePos = 22,
ExtendableMask = 0x1,
// Insns must be extended.
- ExtendedPos = 22,
+ ExtendedPos = 23,
ExtendedMask = 0x1,
// Which operand may be extended.
- ExtendableOpPos = 23,
+ ExtendableOpPos = 24,
ExtendableOpMask = 0x7,
// Signed or unsigned range.
- ExtentSignedPos = 26,
+ ExtentSignedPos = 27,
ExtentSignedMask = 0x1,
// Number of bits of range before extending operand.
- ExtentBitsPos = 27,
+ ExtentBitsPos = 28,
ExtentBitsMask = 0x1f,
// Alignment power-of-two before extending operand.
- ExtentAlignPos = 32,
+ ExtentAlignPos = 33,
ExtentAlignMask = 0x3,
// Valid subtargets
- validSubTargetPos = 34,
- validSubTargetMask = 0xf,
+ validSubTargetPos = 35,
+ validSubTargetMask = 0x3f,
// Addressing mode for load/store instructions.
- AddrModePos = 40,
+ AddrModePos = 41,
AddrModeMask = 0x7,
// Access size for load/store instructions.
- MemAccessSizePos = 43,
+ MemAccessSizePos = 44,
MemAccesSizeMask = 0xf,
// Branch predicted taken.
- TakenPos = 47,
+ TakenPos = 48,
TakenMask = 0x1,
// Floating-point instructions.
- FPPos = 48,
+ FPPos = 49,
FPMask = 0x1,
// New-Value producer-2 instructions.
- hasNewValuePos2 = 50,
+ hasNewValuePos2 = 51,
hasNewValueMask2 = 0x1,
-
// Which operand consumes or produces a new value.
- NewValueOpPos2 = 51,
+ NewValueOpPos2 = 52,
NewValueOpMask2 = 0x7,
// Accumulator instructions.
- AccumulatorPos = 54,
+ AccumulatorPos = 55,
AccumulatorMask = 0x1,
// Complex XU, prevent xu competition by preferring slot3
- PrefersSlot3Pos = 55,
+ PrefersSlot3Pos = 56,
PrefersSlot3Mask = 0x1,
CofMax1Pos = 60,
@@ -217,8 +174,6 @@ namespace HexagonII {
// Hexagon Specific MachineOperand flags.
MO_NO_FLAG,
- HMOTF_ConstExtended = 1,
-
/// MO_PCREL - On a symbol operand, indicates a PC-relative relocation
/// Used for computing a global address for PIC compilations
MO_PCREL,
@@ -250,7 +205,13 @@ namespace HexagonII {
// MO_TPREL - indicates relocation for TLS
// local Executable method
- MO_TPREL
+ MO_TPREL,
+
+ // HMOTF_ConstExtended
+ // Addendum to abovem, indicates a const extended op
+ // Can be used as a mask.
+ HMOTF_ConstExtended = 0x80
+
};
// Hexagon Sub-instruction classes.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 42fcc5a6aa89..dd790fd41257 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -125,46 +125,6 @@ void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo,
O << -1;
}
-void HexagonInstPrinter::prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- int64_t Imm;
- bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
- Imm = SignExtend64<9>(Imm);
- assert(Success); (void)Success;
- assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
- O << formatImm(Imm/64);
-}
-
-void HexagonInstPrinter::prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- int64_t Imm;
- bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
- Imm = SignExtend64<10>(Imm);
- assert(Success); (void)Success;
- assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
- O << formatImm(Imm/128);
-}
-
-void HexagonInstPrinter::prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- int64_t Imm;
- bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
- Imm = SignExtend64<10>(Imm);
- assert(Success); (void)Success;
- assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
- O << formatImm(Imm/64);
-}
-
-void HexagonInstPrinter::prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- int64_t Imm;
- bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
- Imm = SignExtend64<11>(Imm);
- assert(Success); (void)Success;
- assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
- O << formatImm(Imm/128);
-}
-
void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
printOperand(MI, OpNo, O);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 5f421184b20a..ac8e391905e0 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -44,14 +44,6 @@ public:
raw_ostream &O) const;
void printNOneImmOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const;
- void prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
void printBranchOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const;
void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index c619c36164cf..446b3b2ce668 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -23,6 +23,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) {
Data32bitsDirective = "\t.word\t";
Data64bitsDirective = nullptr; // .xword is only supported by V9.
CommentString = "//";
+ SupportsDebugInformation = true;
LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
InlineAsmStart = "# InlineAsm Start";
@@ -30,8 +31,8 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) {
ZeroDirective = "\t.space\t";
AscizDirective = "\t.string\t";
- SupportsDebugInformation = true;
MinInstAlignment = 4;
UsesELFSectionDirectiveForBSS = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
+ UseLogicalShr = false;
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index 07c9ad96a0d7..62b21c419f30 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -47,12 +47,40 @@ void HexagonMCChecker::init() {
if (HexagonMCInstrInfo::isBundle(MCB))
// Unfurl a bundle.
for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
- init(*I.getInst());
+ MCInst const &Inst = *I.getInst();
+ if (HexagonMCInstrInfo::isDuplex(MCII, Inst)) {
+ init(*Inst.getOperand(0).getInst());
+ init(*Inst.getOperand(1).getInst());
+ }
+ else
+ init(Inst);
}
else
init(MCB);
}
+void HexagonMCChecker::initReg(MCInst const &MCI, unsigned R, unsigned &PredReg,
+ bool &isTrue) {
+ if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) {
+ // Note an used predicate register.
+ PredReg = R;
+ isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI);
+
+ // Note use of new predicate register.
+ if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+ NewPreds.insert(PredReg);
+ }
+ else
+ // Note register use. Super-registers are not tracked directly,
+ // but their components.
+ for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+ SRI.isValid();
+ ++SRI)
+ if (!MCSubRegIterator(*SRI, &RI).isValid())
+ // Skip super-registers used indirectly.
+ Uses.insert(*SRI);
+}
+
void HexagonMCChecker::init(MCInst const& MCI) {
const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI);
unsigned PredReg = Hexagon::NoRegister;
@@ -60,28 +88,10 @@ void HexagonMCChecker::init(MCInst const& MCI) {
// Get used registers.
for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i)
- if (MCI.getOperand(i).isReg()) {
- unsigned R = MCI.getOperand(i).getReg();
-
- if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) {
- // Note an used predicate register.
- PredReg = R;
- isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI);
-
- // Note use of new predicate register.
- if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
- NewPreds.insert(PredReg);
- }
- else
- // Note register use. Super-registers are not tracked directly,
- // but their components.
- for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
- SRI.isValid();
- ++SRI)
- if (!MCSubRegIterator(*SRI, &RI).isValid())
- // Skip super-registers used indirectly.
- Uses.insert(*SRI);
- }
+ if (MCI.getOperand(i).isReg())
+ initReg(MCI, MCI.getOperand(i).getReg(), PredReg, isTrue);
+ for (unsigned i = 0; i < MCID.getNumImplicitUses(); ++i)
+ initReg(MCI, MCID.getImplicitUses()[i], PredReg, isTrue);
// Get implicit register definitions.
if (const MCPhysReg *ImpDef = MCID.getImplicitDefs())
@@ -216,9 +226,11 @@ void HexagonMCChecker::init(MCInst const& MCI) {
if (!MCSubRegIterator(N, &RI).isValid()) {
// Super-registers cannot use new values.
if (MCID.isBranch())
- NewUses[N] = NewSense::Jmp(llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV);
+ NewUses[N] = NewSense::Jmp(
+ llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNCJ);
else
- NewUses[N] = NewSense::Use(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI));
+ NewUses[N] = NewSense::Use(
+ PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI));
}
}
}
@@ -230,14 +242,18 @@ HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo cons
init();
}
-bool HexagonMCChecker::check() {
+bool HexagonMCChecker::check(bool FullCheck) {
bool chkB = checkBranches();
bool chkP = checkPredicates();
bool chkNV = checkNewValues();
bool chkR = checkRegisters();
bool chkS = checkSolo();
- bool chkSh = checkShuffle();
- bool chkSl = checkSlots();
+ bool chkSh = true;
+ if (FullCheck)
+ chkSh = checkShuffle();
+ bool chkSl = true;
+ if (FullCheck)
+ chkSl = checkSlots();
bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl;
return chk;
@@ -271,8 +287,8 @@ bool HexagonMCChecker::checkBranches() {
HexagonMCErrInfo errInfo;
if (HexagonMCInstrInfo::isBundle(MCB)) {
bool hasConditional = false;
- unsigned Branches = 0, Returns = 0, NewIndirectBranches = 0,
- NewValueBranches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE,
+ unsigned Branches = 0,
+ Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE,
Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE;
for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset;
@@ -284,12 +300,6 @@ bool HexagonMCChecker::checkBranches() {
if (HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch() ||
HexagonMCInstrInfo::getDesc(MCII, MCI).isCall()) {
++Branches;
- if (HexagonMCInstrInfo::getDesc(MCII, MCI).isIndirectBranch() &&
- HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
- ++NewIndirectBranches;
- if (HexagonMCInstrInfo::isNewValue(MCII, MCI))
- ++NewValueBranches;
-
if (HexagonMCInstrInfo::isPredicated(MCII, MCI) ||
HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) {
hasConditional = true;
@@ -298,9 +308,6 @@ bool HexagonMCChecker::checkBranches() {
Unconditional = i; // Record the position of the unconditional branch.
}
}
- if (HexagonMCInstrInfo::getDesc(MCII, MCI).isReturn() &&
- HexagonMCInstrInfo::getDesc(MCII, MCI).mayLoad())
- ++Returns;
}
if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too?
@@ -504,7 +511,7 @@ bool HexagonMCChecker::checkShuffle() {
HexagonMCErrInfo errInfo;
// Branch info is lost when duplexing. The unduplexed insns must be
// checked and only branch errors matter for this case.
- HexagonMCShuffler MCS(MCII, STI, MCB);
+ HexagonMCShuffler MCS(true, MCII, STI, MCB);
if (!MCS.check()) {
if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) {
errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
@@ -513,7 +520,7 @@ bool HexagonMCChecker::checkShuffle() {
return false;
}
}
- HexagonMCShuffler MCSDX(MCII, STI, MCBDX);
+ HexagonMCShuffler MCSDX(true, MCII, STI, MCBDX);
if (!MCSDX.check()) {
errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
errInfo.setShuffleError(MCSDX.getError());
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
index 33e22798c954..c3b3d4c14c88 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -168,6 +168,7 @@ class HexagonMCChecker {
void init();
void init(MCInst const&);
+ void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue);
// Checks performed.
bool checkBranches();
@@ -177,6 +178,7 @@ class HexagonMCChecker {
bool checkSolo();
bool checkShuffle();
bool checkSlots();
+ bool checkSize();
static void compoundRegisterMap(unsigned&);
@@ -196,7 +198,7 @@ class HexagonMCChecker {
explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx,
const MCRegisterInfo& ri);
- bool check();
+ bool check(bool FullCheck = true);
/// add a new error/warning
void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); };
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 2645a17b9bd0..c0956520de73 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -35,38 +35,40 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII,
MCContext &aMCT)
: MCT(aMCT), MCII(aMII), Addend(new unsigned(0)),
- Extended(new bool(false)), CurrentBundle(new MCInst const *) {}
+ Extended(new bool(false)), CurrentBundle(new MCInst const *),
+ CurrentIndex(new size_t(0)) {}
-uint32_t HexagonMCCodeEmitter::parseBits(size_t Instruction, size_t Last,
+uint32_t HexagonMCCodeEmitter::parseBits(size_t Last,
MCInst const &MCB,
MCInst const &MCI) const {
bool Duplex = HexagonMCInstrInfo::isDuplex(MCII, MCI);
- if (Instruction == 0) {
+ if (*CurrentIndex == 0) {
if (HexagonMCInstrInfo::isInnerLoop(MCB)) {
assert(!Duplex);
- assert(Instruction != Last);
+ assert(*CurrentIndex != Last);
return HexagonII::INST_PARSE_LOOP_END;
}
}
- if (Instruction == 1) {
+ if (*CurrentIndex == 1) {
if (HexagonMCInstrInfo::isOuterLoop(MCB)) {
assert(!Duplex);
- assert(Instruction != Last);
+ assert(*CurrentIndex != Last);
return HexagonII::INST_PARSE_LOOP_END;
}
}
if (Duplex) {
- assert(Instruction == Last);
+ assert(*CurrentIndex == Last);
return HexagonII::INST_PARSE_DUPLEX;
}
- if(Instruction == Last)
+ if(*CurrentIndex == Last)
return HexagonII::INST_PARSE_PACKET_END;
return HexagonII::INST_PARSE_NOT_END;
}
-void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS,
+/// EncodeInstruction - Emit the bundle
+void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- MCSubtargetInfo const &STI) const {
+ const MCSubtargetInfo &STI) const {
MCInst &HMB = const_cast<MCInst &>(MI);
assert(HexagonMCInstrInfo::isBundle(HMB));
@@ -74,7 +76,7 @@ void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS,
*Addend = 0;
*Extended = false;
*CurrentBundle = &MI;
- size_t Instruction = 0;
+ *CurrentIndex = 0;
size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1;
for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) {
MCInst &HMI = const_cast<MCInst &>(*I.getInst());
@@ -82,11 +84,10 @@ void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS,
computeAvailableFeatures(STI.getFeatureBits()));
EncodeSingleInstruction(HMI, OS, Fixups, STI,
- parseBits(Instruction, Last, HMB, HMI),
- Instruction);
+ parseBits(Last, HMB, HMI));
*Extended = HexagonMCInstrInfo::isImmext(HMI);
*Addend += HEXAGON_INSTR_SIZE;
- ++Instruction;
+ ++*CurrentIndex;
}
return;
}
@@ -107,165 +108,44 @@ static bool RegisterMatches(unsigned Consumer, unsigned Producer,
/// EncodeSingleInstruction - Emit a single
void HexagonMCCodeEmitter::EncodeSingleInstruction(
const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI, uint32_t Parse, size_t Index) const {
- MCInst HMB = MI;
- assert(!HexagonMCInstrInfo::isBundle(HMB));
+ const MCSubtargetInfo &STI, uint32_t Parse) const {
+ assert(!HexagonMCInstrInfo::isBundle(MI));
uint64_t Binary;
- // Compound instructions are limited to using registers 0-7 and 16-23
- // and here we make a map 16-23 to 8-15 so they can be correctly encoded.
- static unsigned RegMap[8] = {Hexagon::R8, Hexagon::R9, Hexagon::R10,
- Hexagon::R11, Hexagon::R12, Hexagon::R13,
- Hexagon::R14, Hexagon::R15};
-
// Pseudo instructions don't get encoded and shouldn't be here
// in the first place!
- assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() &&
+ assert(!HexagonMCInstrInfo::getDesc(MCII, MI).isPseudo() &&
"pseudo-instruction found");
DEBUG(dbgs() << "Encoding insn"
- " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
+ " `" << HexagonMCInstrInfo::getName(MCII, MI) << "'"
"\n");
- if (llvm::HexagonMCInstrInfo::getType(MCII, HMB) == HexagonII::TypeCOMPOUND) {
- for (unsigned i = 0; i < HMB.getNumOperands(); ++i)
- if (HMB.getOperand(i).isReg()) {
- unsigned Reg =
- MCT.getRegisterInfo()->getEncodingValue(HMB.getOperand(i).getReg());
- if ((Reg <= 23) && (Reg >= 16))
- HMB.getOperand(i).setReg(RegMap[Reg - 16]);
- }
- }
-
- if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) {
- // Calculate the new value distance to the associated producer
- MCOperand &MCO =
- HMB.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, HMB));
- unsigned SOffset = 0;
- unsigned VOffset = 0;
- unsigned Register = MCO.getReg();
- unsigned Register1;
- unsigned Register2;
- auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
- auto i = Instructions.begin() + Index - 1;
- for (;; --i) {
- assert(i != Instructions.begin() - 1 && "Couldn't find producer");
- MCInst const &Inst = *i->getInst();
- if (HexagonMCInstrInfo::isImmext(Inst))
- continue;
- ++SOffset;
- if (HexagonMCInstrInfo::isVector(MCII, Inst))
- // Vector instructions don't count scalars
- ++VOffset;
- Register1 =
- HexagonMCInstrInfo::hasNewValue(MCII, Inst)
- ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg()
- : static_cast<unsigned>(Hexagon::NoRegister);
- Register2 =
- HexagonMCInstrInfo::hasNewValue2(MCII, Inst)
- ? HexagonMCInstrInfo::getNewValueOperand2(MCII, Inst).getReg()
- : static_cast<unsigned>(Hexagon::NoRegister);
- if (!RegisterMatches(Register, Register1, Register2))
- // This isn't the register we're looking for
- continue;
- if (!HexagonMCInstrInfo::isPredicated(MCII, Inst))
- // Producer is unpredicated
- break;
- assert(HexagonMCInstrInfo::isPredicated(MCII, HMB) &&
- "Unpredicated consumer depending on predicated producer");
- if (HexagonMCInstrInfo::isPredicatedTrue(MCII, Inst) ==
- HexagonMCInstrInfo::isPredicatedTrue(MCII, HMB))
- // Producer predicate sense matched ours
- break;
- }
- // Hexagon PRM 10.11 Construct Nt from distance
- unsigned Offset =
- HexagonMCInstrInfo::isVector(MCII, HMB) ? VOffset : SOffset;
- Offset <<= 1;
- Offset |=
- HexagonMCInstrInfo::SubregisterBit(Register, Register1, Register2);
- MCO.setReg(Offset + Hexagon::R0);
- }
-
- Binary = getBinaryCodeForInstr(HMB, Fixups, STI);
+ Binary = getBinaryCodeForInstr(MI, Fixups, STI);
// Check for unimplemented instructions. Immediate extenders
// are encoded as zero, so they need to be accounted for.
- if ((!Binary) &&
- ((HMB.getOpcode() != DuplexIClass0) && (HMB.getOpcode() != A4_ext) &&
- (HMB.getOpcode() != A4_ext_b) && (HMB.getOpcode() != A4_ext_c) &&
- (HMB.getOpcode() != A4_ext_g))) {
+ if (!Binary &&
+ MI.getOpcode() != DuplexIClass0 &&
+ MI.getOpcode() != A4_ext) {
DEBUG(dbgs() << "Unimplemented inst: "
- " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
+ " `" << HexagonMCInstrInfo::getName(MCII, MI) << "'"
"\n");
llvm_unreachable("Unimplemented Instruction");
}
Binary |= Parse;
// if we need to emit a duplexed instruction
- if (HMB.getOpcode() >= Hexagon::DuplexIClass0 &&
- HMB.getOpcode() <= Hexagon::DuplexIClassF) {
+ if (MI.getOpcode() >= Hexagon::DuplexIClass0 &&
+ MI.getOpcode() <= Hexagon::DuplexIClassF) {
assert(Parse == HexagonII::INST_PARSE_DUPLEX &&
"Emitting duplex without duplex parse bits");
- unsigned dupIClass;
- switch (HMB.getOpcode()) {
- case Hexagon::DuplexIClass0:
- dupIClass = 0;
- break;
- case Hexagon::DuplexIClass1:
- dupIClass = 1;
- break;
- case Hexagon::DuplexIClass2:
- dupIClass = 2;
- break;
- case Hexagon::DuplexIClass3:
- dupIClass = 3;
- break;
- case Hexagon::DuplexIClass4:
- dupIClass = 4;
- break;
- case Hexagon::DuplexIClass5:
- dupIClass = 5;
- break;
- case Hexagon::DuplexIClass6:
- dupIClass = 6;
- break;
- case Hexagon::DuplexIClass7:
- dupIClass = 7;
- break;
- case Hexagon::DuplexIClass8:
- dupIClass = 8;
- break;
- case Hexagon::DuplexIClass9:
- dupIClass = 9;
- break;
- case Hexagon::DuplexIClassA:
- dupIClass = 10;
- break;
- case Hexagon::DuplexIClassB:
- dupIClass = 11;
- break;
- case Hexagon::DuplexIClassC:
- dupIClass = 12;
- break;
- case Hexagon::DuplexIClassD:
- dupIClass = 13;
- break;
- case Hexagon::DuplexIClassE:
- dupIClass = 14;
- break;
- case Hexagon::DuplexIClassF:
- dupIClass = 15;
- break;
- default:
- llvm_unreachable("Unimplemented DuplexIClass");
- break;
- }
+ unsigned dupIClass = MI.getOpcode() - Hexagon::DuplexIClass0;
// 29 is the bit position.
// 0b1110 =0xE bits are masked off and down shifted by 1 bit.
// Last bit is moved to bit position 13
Binary = ((dupIClass & 0xE) << (29 - 1)) | ((dupIClass & 0x1) << 13);
- const MCInst *subInst0 = HMB.getOperand(0).getInst();
- const MCInst *subInst1 = HMB.getOperand(1).getInst();
+ const MCInst *subInst0 = MI.getOperand(0).getInst();
+ const MCInst *subInst1 = MI.getOperand(1).getInst();
// get subinstruction slot 0
unsigned subInstSlot0Bits = getBinaryCodeForInstr(*subInst0, Fixups, STI);
@@ -293,14 +173,13 @@ void raise_relocation_error(unsigned bits, unsigned kind) {
/// getFixupNoBits - Some insns are not extended and thus have no
/// bits. These cases require a more brute force method for determining
/// the correct relocation.
-namespace {
-Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
- const MCOperand &MO,
- const MCSymbolRefExpr::VariantKind kind) {
+Hexagon::Fixups HexagonMCCodeEmitter::getFixupNoBits(
+ MCInstrInfo const &MCII, const MCInst &MI, const MCOperand &MO,
+ const MCSymbolRefExpr::VariantKind kind) const {
const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI);
unsigned insnType = llvm::HexagonMCInstrInfo::getType(MCII, MI);
- if (insnType == HexagonII::TypePREFIX) {
+ if (insnType == HexagonII::TypeEXTENDER) {
switch (kind) {
case MCSymbolRefExpr::VK_GOTREL:
return Hexagon::fixup_Hexagon_GOTREL_32_6_X;
@@ -319,11 +198,21 @@ Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
case MCSymbolRefExpr::VK_Hexagon_IE_GOT:
return Hexagon::fixup_Hexagon_IE_GOT_32_6_X;
case MCSymbolRefExpr::VK_Hexagon_PCREL:
- case MCSymbolRefExpr::VK_None:
- if (MCID.isBranch())
- return Hexagon::fixup_Hexagon_B32_PCREL_X;
- else
- return Hexagon::fixup_Hexagon_32_6_X;
+ return Hexagon::fixup_Hexagon_B32_PCREL_X;
+ case MCSymbolRefExpr::VK_None: {
+ auto Insts = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+ for (auto I = Insts.begin(), N = Insts.end(); I != N; ++I) {
+ if (I->getInst() == &MI) {
+ const MCInst &NextI = *(I+1)->getInst();
+ const MCInstrDesc &D = HexagonMCInstrInfo::getDesc(MCII, NextI);
+ if (D.isBranch() || D.isCall() ||
+ HexagonMCInstrInfo::getType(MCII, NextI) == HexagonII::TypeCR)
+ return Hexagon::fixup_Hexagon_B32_PCREL_X;
+ return Hexagon::fixup_Hexagon_32_6_X;
+ }
+ }
+ raise_relocation_error(0, kind);
+ }
default:
raise_relocation_error(0, kind);
}
@@ -406,7 +295,6 @@ Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
}
llvm_unreachable("Relocation exit not taken");
}
-}
namespace llvm {
extern const MCInstrDesc HexagonInsts[];
@@ -450,7 +338,8 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
int64_t Value;
if (ME->evaluateAsAbsolute(Value))
return Value;
- assert(ME->getKind() == MCExpr::SymbolRef || ME->getKind() == MCExpr::Binary);
+ assert(ME->getKind() == MCExpr::SymbolRef ||
+ ME->getKind() == MCExpr::Binary);
if (ME->getKind() == MCExpr::Binary) {
MCBinaryExpr const *Binary = cast<MCBinaryExpr>(ME);
getExprOpValue(MI, MO, Binary->getLHS(), Fixups, STI);
@@ -581,7 +470,30 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
if (HexagonMCInstrInfo::s23_2_reloc(*MO.getExpr()))
FixupKind = Hexagon::fixup_Hexagon_23_REG;
else
- raise_relocation_error(bits, kind);
+ if (MCID.mayStore() || MCID.mayLoad()) {
+ for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
+ ++ImpUses) {
+ if (*ImpUses != Hexagon::GP)
+ continue;
+ switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
+ case HexagonII::MemAccessSize::ByteAccess:
+ FixupKind = fixup_Hexagon_GPREL16_0;
+ break;
+ case HexagonII::MemAccessSize::HalfWordAccess:
+ FixupKind = fixup_Hexagon_GPREL16_1;
+ break;
+ case HexagonII::MemAccessSize::WordAccess:
+ FixupKind = fixup_Hexagon_GPREL16_2;
+ break;
+ case HexagonII::MemAccessSize::DoubleWordAccess:
+ FixupKind = fixup_Hexagon_GPREL16_3;
+ break;
+ default:
+ raise_relocation_error(bits, kind);
+ }
+ }
+ } else
+ raise_relocation_error(bits, kind);
break;
}
case MCSymbolRefExpr::VK_DTPREL:
@@ -795,10 +707,71 @@ unsigned
HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
SmallVectorImpl<MCFixup> &Fixups,
MCSubtargetInfo const &STI) const {
+#ifndef NDEBUG
+ size_t OperandNumber = ~0U;
+ for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i)
+ if (&MI.getOperand(i) == &MO) {
+ OperandNumber = i;
+ break;
+ }
+ assert((OperandNumber != ~0U) && "Operand not found");
+#endif
+
+ if (HexagonMCInstrInfo::isNewValue(MCII, MI) &&
+ &MO == &MI.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, MI))) {
+ // Calculate the new value distance to the associated producer
+ MCOperand const &MCO =
+ MI.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, MI));
+ unsigned SOffset = 0;
+ unsigned VOffset = 0;
+ unsigned Register = MCO.getReg();
+ unsigned Register1;
+ unsigned Register2;
+ auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+ auto i = Instructions.begin() + *CurrentIndex - 1;
+ for (;; --i) {
+ assert(i != Instructions.begin() - 1 && "Couldn't find producer");
+ MCInst const &Inst = *i->getInst();
+ if (HexagonMCInstrInfo::isImmext(Inst))
+ continue;
+ ++SOffset;
+ if (HexagonMCInstrInfo::isVector(MCII, Inst))
+ // Vector instructions don't count scalars
+ ++VOffset;
+ Register1 =
+ HexagonMCInstrInfo::hasNewValue(MCII, Inst)
+ ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg()
+ : static_cast<unsigned>(Hexagon::NoRegister);
+ Register2 =
+ HexagonMCInstrInfo::hasNewValue2(MCII, Inst)
+ ? HexagonMCInstrInfo::getNewValueOperand2(MCII, Inst).getReg()
+ : static_cast<unsigned>(Hexagon::NoRegister);
+ if (!RegisterMatches(Register, Register1, Register2))
+ // This isn't the register we're looking for
+ continue;
+ if (!HexagonMCInstrInfo::isPredicated(MCII, Inst))
+ // Producer is unpredicated
+ break;
+ assert(HexagonMCInstrInfo::isPredicated(MCII, MI) &&
+ "Unpredicated consumer depending on predicated producer");
+ if (HexagonMCInstrInfo::isPredicatedTrue(MCII, Inst) ==
+ HexagonMCInstrInfo::isPredicatedTrue(MCII, MI))
+ // Producer predicate sense matched ours
+ break;
+ }
+ // Hexagon PRM 10.11 Construct Nt from distance
+ unsigned Offset =
+ HexagonMCInstrInfo::isVector(MCII, MI) ? VOffset : SOffset;
+ Offset <<= 1;
+ Offset |=
+ HexagonMCInstrInfo::SubregisterBit(Register, Register1, Register2);
+ return Offset;
+ }
assert(!MO.isImm());
if (MO.isReg()) {
unsigned Reg = MO.getReg();
- if (HexagonMCInstrInfo::isSubInstruction(MI))
+ if (HexagonMCInstrInfo::isSubInstruction(MI) ||
+ llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCJ)
return HexagonMCInstrInfo::getDuplexRegisterNumbering(Reg);
switch(MI.getOpcode()){
case Hexagon::A2_tfrrcr:
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index 8e0667d9ac8e..c3a4beec313f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -15,6 +15,7 @@
#ifndef HEXAGONMCCODEEMITTER_H
#define HEXAGONMCCODEEMITTER_H
+#include "MCTargetDesc/HexagonFixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -31,18 +32,22 @@ class HexagonMCCodeEmitter : public MCCodeEmitter {
std::unique_ptr<unsigned> Addend;
std::unique_ptr<bool> Extended;
std::unique_ptr<MCInst const *> CurrentBundle;
+ std::unique_ptr<size_t> CurrentIndex;
// helper routine for getMachineOpValue()
unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO,
const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
+ const MCOperand &MO,
+ const MCSymbolRefExpr::VariantKind kind) const;
+
public:
HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT);
// Return parse bits for instruction `MCI' inside bundle `MCB'
- uint32_t parseBits(size_t Instruction, size_t Last, MCInst const &MCB,
- MCInst const &MCI) const;
+ uint32_t parseBits(size_t Last, MCInst const &MCB, MCInst const &MCI) const;
void encodeInstruction(MCInst const &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
@@ -51,7 +56,7 @@ public:
void EncodeSingleInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI,
- uint32_t Parse, size_t Index) const;
+ uint32_t Parse) const;
// \brief TableGen'erated function for getting the
// binary encoding for an instruction.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 9a09a17767a6..ffa980ca6563 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -14,6 +14,7 @@
#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Debug.h"
@@ -396,7 +397,7 @@ static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context,
/// is found update the contents fo the bundle with the compound insn.
/// If a compound instruction is found then the bundle will have one
/// additional slot.
-void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
+void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCContext &Context, MCInst &MCI) {
assert(HexagonMCInstrInfo::isBundle(MCI) &&
"Non-Bundle where Bundle expected");
@@ -405,8 +406,23 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
if (MCI.size() < 2)
return;
+ bool StartedValid = llvm::HexagonMCShuffle(false, MCII, STI, MCI);
+
+ // Create a vector, needed to keep the order of jump instructions.
+ MCInst CheckList(MCI);
+
// Look for compounds until none are found, only update the bundle when
// a compound is found.
- while (lookForCompound(MCII, Context, MCI))
- ;
+ while (lookForCompound(MCII, Context, CheckList)) {
+ // Keep the original bundle around in case the shuffle fails.
+ MCInst OriginalBundle(MCI);
+
+ // Need to update the bundle.
+ MCI = CheckList;
+
+ if (StartedValid && !llvm::HexagonMCShuffle(false, MCII, STI, MCI)) {
+ DEBUG(dbgs() << "Found ERROR\n");
+ MCI = OriginalBundle;
+ }
+ }
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index 413f052aa4bd..e8f154a1fa53 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -15,6 +15,7 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -262,6 +263,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
case Hexagon::EH_RETURN_JMPR:
case Hexagon::J2_jumpr:
+ case Hexagon::PS_jmpret:
// jumpr r31
// Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>.
DstReg = MCI.getOperand(0).getReg();
@@ -275,6 +277,12 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
case Hexagon::J2_jumprfnew:
case Hexagon::J2_jumprtnewpt:
case Hexagon::J2_jumprfnewpt:
+ case Hexagon::PS_jmprett:
+ case Hexagon::PS_jmpretf:
+ case Hexagon::PS_jmprettnew:
+ case Hexagon::PS_jmpretfnew:
+ case Hexagon::PS_jmprettnewpt:
+ case Hexagon::PS_jmpretfnewpt:
DstReg = MCI.getOperand(1).getReg();
SrcReg = MCI.getOperand(0).getReg();
// [if ([!]p0[.new])] jumpr r31
@@ -284,15 +292,10 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
}
break;
case Hexagon::L4_return_t:
-
case Hexagon::L4_return_f:
-
case Hexagon::L4_return_tnew_pnt:
-
case Hexagon::L4_return_fnew_pnt:
-
case Hexagon::L4_return_tnew_pt:
-
case Hexagon::L4_return_fnew_pt:
// [if ([!]p0[.new])] dealloc_return
SrcReg = MCI.getOperand(0).getReg();
@@ -565,7 +568,8 @@ bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) {
bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII,
MCInst const &MIa, bool ExtendedA,
MCInst const &MIb, bool ExtendedB,
- bool bisReversable) {
+ bool bisReversable,
+ MCSubtargetInfo const &STI) {
// Slot 1 cannot be extended in duplexes PRM 10.5
if (ExtendedA)
return false;
@@ -625,11 +629,16 @@ bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII,
return false;
}
- // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb);
- // therefore, not duplexable if slot 1 is a store, and slot 0 is not.
- if ((MIbG == HexagonII::HSIG_S1) || (MIbG == HexagonII::HSIG_S2)) {
- if ((MIaG != HexagonII::HSIG_S1) && (MIaG != HexagonII::HSIG_S2))
- return false;
+ if (STI.getCPU().equals_lower("hexagonv4") ||
+ STI.getCPU().equals_lower("hexagonv5") ||
+ STI.getCPU().equals_lower("hexagonv55") ||
+ STI.getCPU().equals_lower("hexagonv60")) {
+ // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb);
+ // therefore, not duplexable if slot 1 is a store, and slot 0 is not.
+ if ((MIbG == HexagonII::HSIG_S1) || (MIbG == HexagonII::HSIG_S2)) {
+ if ((MIaG != HexagonII::HSIG_S1) && (MIaG != HexagonII::HSIG_S2))
+ return false;
+ }
}
return (isDuplexPairMatch(MIaG, MIbG));
@@ -703,6 +712,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
Result.setOpcode(Hexagon::SA1_dec);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
break;
} // 1,2 SUBInst $Rd = add($Rs,#-1)
else if (Inst.getOperand(1).getReg() == Hexagon::R29) {
@@ -806,20 +816,27 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
break; // none SUBInst deallocframe
case Hexagon::EH_RETURN_JMPR:
case Hexagon::J2_jumpr:
+ case Hexagon::PS_jmpret:
Result.setOpcode(Hexagon::SL2_jumpr31);
break; // none SUBInst jumpr r31
case Hexagon::J2_jumprf:
+ case Hexagon::PS_jmpretf:
Result.setOpcode(Hexagon::SL2_jumpr31_f);
break; // none SUBInst if (!p0) jumpr r31
case Hexagon::J2_jumprfnew:
case Hexagon::J2_jumprfnewpt:
+ case Hexagon::PS_jmpretfnewpt:
+ case Hexagon::PS_jmpretfnew:
Result.setOpcode(Hexagon::SL2_jumpr31_fnew);
break; // none SUBInst if (!p0.new) jumpr:nt r31
case Hexagon::J2_jumprt:
+ case Hexagon::PS_jmprett:
Result.setOpcode(Hexagon::SL2_jumpr31_t);
break; // none SUBInst if (p0) jumpr r31
case Hexagon::J2_jumprtnew:
case Hexagon::J2_jumprtnewpt:
+ case Hexagon::PS_jmprettnewpt:
+ case Hexagon::PS_jmprettnew:
Result.setOpcode(Hexagon::SL2_jumpr31_tnew);
break; // none SUBInst if (p0.new) jumpr:nt r31
case Hexagon::L2_loadrb_io:
@@ -966,6 +983,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
if (Absolute && Value == -1) {
Result.setOpcode(Hexagon::SA1_setin1);
addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
break; // 2 1 SUBInst $Rd = #-1
} else {
Result.setOpcode(Hexagon::SA1_seti);
@@ -1005,6 +1023,7 @@ static bool isStoreInst(unsigned opCode) {
SmallVector<DuplexCandidate, 8>
HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
MCInst const &MCB) {
assert(isBundle(MCB));
SmallVector<DuplexCandidate, 8> duplexToTry;
@@ -1033,7 +1052,7 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1),
*MCB.getOperand(j).getInst(),
HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1),
- bisReversable)) {
+ bisReversable, STI)) {
// Get iClass.
unsigned iClass = iClassOfDuplexPair(
getDuplexCandidateGroup(*MCB.getOperand(k).getInst()),
@@ -1058,7 +1077,7 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1),
*MCB.getOperand(k).getInst(),
HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1),
- bisReversable)) {
+ bisReversable, STI)) {
// Get iClass.
unsigned iClass = iClassOfDuplexPair(
getDuplexCandidateGroup(*MCB.getOperand(j).getInst()),
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 226470cfbced..9e1ff9ca35d7 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -37,30 +37,19 @@
using namespace llvm;
-static cl::opt<unsigned>
- GPSize("gpsize", cl::NotHidden,
- cl::desc("Global Pointer Addressing Size. The default size is 8."),
- cl::Prefix, cl::init(8));
-
-void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
- const MCSubtargetInfo &STI) {
- MCInst HMI = HexagonMCInstrInfo::createBundle();
- MCInst *MCB;
-
- if (MCK.getOpcode() != Hexagon::BUNDLE) {
- HMI.addOperand(MCOperand::createInst(&MCK));
- MCB = &HMI;
- } else
- MCB = const_cast<MCInst *>(&MCK);
-
- // Examines packet and pad the packet, if needed, when an
- // end-loop is in the bundle.
- HexagonMCInstrInfo::padEndloop(getContext(), *MCB);
- HexagonMCShuffle(*MCII, STI, *MCB);
-
- assert(HexagonMCInstrInfo::bundleSize(*MCB) <= HEXAGON_PACKET_SIZE);
+static cl::opt<unsigned> GPSize
+ ("gpsize", cl::NotHidden,
+ cl::desc("Global Pointer Addressing Size. The default size is 8."),
+ cl::Prefix,
+ cl::init(8));
+
+void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB,
+ const MCSubtargetInfo &STI, bool) {
+ assert(MCB.getOpcode() == Hexagon::BUNDLE);
+ assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE);
+ assert(HexagonMCInstrInfo::bundleSize(MCB) > 0);
bool Extended = false;
- for (auto &I : HexagonMCInstrInfo::bundleInstructions(*MCB)) {
+ for (auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
MCInst *MCI = const_cast<MCInst *>(I.getInst());
if (Extended) {
if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) {
@@ -77,11 +66,12 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
// At this point, MCB is a bundle
// Iterate through the bundle and assign addends for the instructions
- for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MCB)) {
+ for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
MCInst *MCI = const_cast<MCInst *>(I.getInst());
EmitSymbol(*MCI);
}
- MCObjectStreamer::EmitInstruction(*MCB, STI);
+
+ MCObjectStreamer::EmitInstruction(MCB, STI);
}
void HexagonMCELFStreamer::EmitSymbol(const MCInst &Inst) {
@@ -119,9 +109,11 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
MCSectionSubPair P = getCurrentSection();
SwitchSection(&Section);
- EmitValueToAlignment(ByteAlignment, 0, 1, 0);
- EmitLabel(Symbol);
- EmitZeros(Size);
+ if (ELFSymbol->isUndefined(false)) {
+ EmitValueToAlignment(ByteAlignment, 0, 1, 0);
+ EmitLabel(Symbol);
+ EmitZeros(Size);
+ }
// Update the maximum alignment of the section if necessary.
if (ByteAlignment > Section.getAlignment())
@@ -144,9 +136,10 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
ELFSymbol->setSize(MCConstantExpr::create(Size, getContext()));
}
-void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(
- MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment,
- unsigned AccessSize) {
+void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol,
+ uint64_t Size,
+ unsigned ByteAlignment,
+ unsigned AccessSize) {
getAssembler().registerSymbol(*Symbol);
auto ELFSymbol = cast<MCSymbolELF>(Symbol);
ELFSymbol->setBinding(ELF::STB_LOCAL);
@@ -154,11 +147,12 @@ void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(
HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment, AccessSize);
}
-namespace llvm {
-MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_pwrite_stream &OS, MCCodeEmitter *CE) {
- return new HexagonMCELFStreamer(Context, MAB, OS, CE);
-}
+namespace llvm {
+ MCStreamer *createHexagonELFStreamer(Triple const &TT, MCContext &Context,
+ MCAsmBackend &MAB,
+ raw_pwrite_stream &OS, MCCodeEmitter *CE) {
+ return new HexagonMCELFStreamer(Context, MAB, OS, CE);
+ }
} // end namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
index 0ac1a68d4ef9..024dff1a2f97 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
@@ -27,7 +27,15 @@ public:
: MCELFStreamer(Context, TAB, OS, Emitter),
MCII(createHexagonMCInstrInfo()) {}
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ HexagonMCELFStreamer(MCContext &Context,
+ MCAsmBackend &TAB,
+ raw_pwrite_stream &OS, MCCodeEmitter *Emitter,
+ MCAssembler *Assembler) :
+ MCELFStreamer(Context, TAB, OS, Emitter),
+ MCII (createHexagonMCInstrInfo()) {}
+
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override;
void EmitSymbol(const MCInst &Inst);
void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment,
@@ -36,8 +44,9 @@ public:
unsigned ByteAlignment, unsigned AccessSize);
};
-MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_pwrite_stream &OS, MCCodeEmitter *CE);
+MCStreamer *createHexagonELFStreamer(Triple const &TT, MCContext &Context,
+ MCAsmBackend &MAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *CE);
} // end namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
index e93906a0a396..14300edc7e1b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -11,7 +11,9 @@
#include "HexagonMCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Object/ELF.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -36,7 +38,47 @@ MCFragment *llvm::HexagonMCExpr::findAssociatedFragment() const {
return Expr->findAssociatedFragment();
}
-void HexagonMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+ switch (Expr->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Cannot handle nested target MCExpr");
+ break;
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *be = cast<MCBinaryExpr>(Expr);
+ fixELFSymbolsInTLSFixupsImpl(be->getLHS(), Asm);
+ fixELFSymbolsInTLSFixupsImpl(be->getRHS(), Asm);
+ break;
+ }
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr &symRef = *cast<MCSymbolRefExpr>(Expr);
+ switch (symRef.getKind()) {
+ default:
+ return;
+ case MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ case MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ case MCSymbolRefExpr::VK_Hexagon_GD_PLT:
+ case MCSymbolRefExpr::VK_Hexagon_LD_PLT:
+ case MCSymbolRefExpr::VK_Hexagon_IE:
+ case MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ case MCSymbolRefExpr::VK_TPREL:
+ break;
+ }
+ cast<MCSymbolELF>(symRef.getSymbol()).setType(ELF::STT_TLS);
+ break;
+ }
+ case MCExpr::Unary:
+ fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void HexagonMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+ auto expr = getExpr();
+ fixELFSymbolsInTLSFixupsImpl(expr, Asm);
+}
MCExpr const *HexagonMCExpr::getExpr() const { return Expr; }
@@ -75,4 +117,4 @@ void HexagonMCExpr::setSignMismatch(bool Val) {
bool HexagonMCExpr::signMismatch() const {
return SignMismatch;
-} \ No newline at end of file
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index e627f026c8ad..553ffba508a1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -16,10 +16,9 @@
#include "Hexagon.h"
#include "HexagonBaseInfo.h"
#include "HexagonMCChecker.h"
-
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
@@ -59,31 +58,36 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
MCSubtargetInfo const &STI,
MCContext &Context, MCInst &MCB,
HexagonMCChecker *Check) {
- // Examine the packet and convert pairs of instructions to compound
- // instructions when possible.
- if (!HexagonDisableCompound)
- HexagonMCInstrInfo::tryCompound(MCII, Context, MCB);
// Check the bundle for errors.
- bool CheckOk = Check ? Check->check() : true;
+ bool CheckOk = Check ? Check->check(false) : true;
if (!CheckOk)
return false;
- HexagonMCShuffle(MCII, STI, MCB);
+ // Examine the packet and convert pairs of instructions to compound
+ // instructions when possible.
+ if (!HexagonDisableCompound)
+ HexagonMCInstrInfo::tryCompound(MCII, STI, Context, MCB);
+ HexagonMCShuffle(false, MCII, STI, MCB);
// Examine the packet and convert pairs of instructions to duplex
// instructions when possible.
MCInst InstBundlePreDuplex = MCInst(MCB);
if (!HexagonDisableDuplex) {
SmallVector<DuplexCandidate, 8> possibleDuplexes;
- possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, MCB);
+ possibleDuplexes =
+ HexagonMCInstrInfo::getDuplexPossibilties(MCII, STI, MCB);
HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes);
}
// Examines packet and pad the packet, if needed, when an
// end-loop is in the bundle.
- HexagonMCInstrInfo::padEndloop(Context, MCB);
+ HexagonMCInstrInfo::padEndloop(MCB, Context);
// If compounding and duplexing didn't reduce the size below
// 4 or less we have a packet that is too big.
if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE)
return false;
- HexagonMCShuffle(MCII, STI, MCB);
+ // Check the bundle for errors.
+ CheckOk = Check ? Check->check(true) : true;
+ if (!CheckOk)
+ return false;
+ HexagonMCShuffle(true, MCII, STI, MCB);
return true;
}
@@ -111,32 +115,14 @@ MCInst HexagonMCInstrInfo::createBundle() {
return Result;
}
-MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
- MCInst const &inst0,
- MCInst const &inst1) {
- assert((iClass <= 0xf) && "iClass must have range of 0 to 0xf");
- MCInst *duplexInst = new (Context) MCInst;
- duplexInst->setOpcode(Hexagon::DuplexIClass0 + iClass);
-
- MCInst *SubInst0 = new (Context) MCInst(deriveSubInst(inst0));
- MCInst *SubInst1 = new (Context) MCInst(deriveSubInst(inst1));
- duplexInst->addOperand(MCOperand::createInst(SubInst0));
- duplexInst->addOperand(MCOperand::createInst(SubInst1));
- return duplexInst;
-}
-
MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII,
MCInst const &Inst,
MCOperand const &MO) {
assert(HexagonMCInstrInfo::isExtendable(MCII, Inst) ||
HexagonMCInstrInfo::isExtended(MCII, Inst));
- MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, Inst);
MCInst XMI;
- XMI.setOpcode((Desc.isBranch() || Desc.isCall() ||
- HexagonMCInstrInfo::getType(MCII, Inst) == HexagonII::TypeCR)
- ? Hexagon::A4_ext_b
- : Hexagon::A4_ext);
+ XMI.setOpcode(Hexagon::A4_ext);
if (MO.isImm())
XMI.addOperand(MCOperand::createImm(MO.getImm() & (~0x3f)));
else if (MO.isExpr())
@@ -146,6 +132,20 @@ MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII,
return XMI;
}
+MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
+ MCInst const &inst0,
+ MCInst const &inst1) {
+ assert((iClass <= 0xf) && "iClass must have range of 0 to 0xf");
+ MCInst *duplexInst = new (Context) MCInst;
+ duplexInst->setOpcode(Hexagon::DuplexIClass0 + iClass);
+
+ MCInst *SubInst0 = new (Context) MCInst(deriveSubInst(inst0));
+ MCInst *SubInst1 = new (Context) MCInst(deriveSubInst(inst1));
+ duplexInst->addOperand(MCOperand::createInst(SubInst0));
+ duplexInst->addOperand(MCOperand::createInst(SubInst1));
+ return duplexInst;
+}
+
MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB,
size_t Index) {
assert(Index <= bundleSize(MCB));
@@ -173,22 +173,9 @@ HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) {
HexagonII::MemAccesSizeMask));
}
-unsigned HexagonMCInstrInfo::getBitCount(MCInstrInfo const &MCII,
- MCInst const &MCI) {
- uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
-}
-
-// Return constant extended operand number.
-unsigned short HexagonMCInstrInfo::getCExtOpNum(MCInstrInfo const &MCII,
- MCInst const &MCI) {
- const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
-}
-
MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII,
MCInst const &MCI) {
- return (MCII.get(MCI.getOpcode()));
+ return MCII.get(MCI.getOpcode());
}
unsigned HexagonMCInstrInfo::getDuplexRegisterNumbering(unsigned Reg) {
@@ -276,34 +263,32 @@ unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII,
return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
}
-// Return the max value that a constant extendable operand can have
-// without being extended.
+/// Return the maximum value of an extendable operand.
int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII,
MCInst const &MCI) {
- uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- unsigned isSigned =
- (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ bool S = (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- if (isSigned) // if value is signed
- return ~(-1U << (bits - 1));
- else
- return ~(-1U << bits);
+ assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
+ HexagonMCInstrInfo::isExtended(MCII, MCI));
+
+ if (S) // if value is signed
+ return (1 << (HexagonMCInstrInfo::getExtentBits(MCII, MCI) - 1)) - 1;
+ return (1 << HexagonMCInstrInfo::getExtentBits(MCII, MCI)) - 1;
}
-// Return the min value that a constant extendable operand can have
-// without being extended.
+/// Return the minimum value of an extendable operand.
int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII,
MCInst const &MCI) {
- uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- unsigned isSigned =
- (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ bool S = (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- if (isSigned) // if value is signed
- return -1U << (bits - 1);
- else
- return 0;
+ assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
+ HexagonMCInstrInfo::isExtended(MCII, MCI));
+
+ if (S) // if value is signed
+ return -(1 << (HexagonMCInstrInfo::getExtentBits(MCII, MCI) - 1));
+ return 0;
}
StringRef HexagonMCInstrInfo::getName(MCInstrInfo const &MCII,
@@ -319,9 +304,7 @@ unsigned short HexagonMCInstrInfo::getNewValueOp(MCInstrInfo const &MCII,
MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII,
MCInst const &MCI) {
- uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- unsigned const O =
- (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask;
+ unsigned O = HexagonMCInstrInfo::getNewValueOp(MCII, MCI);
MCOperand const &MCO = MCI.getOperand(O);
assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) ||
@@ -349,6 +332,13 @@ HexagonMCInstrInfo::getNewValueOperand2(MCInstrInfo const &MCII,
return (MCO);
}
+/// Return the Hexagon ISA class for the insn.
+unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = MCII.get(MCI.getOpcode()).TSFlags;
+ return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+}
+
int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -361,33 +351,55 @@ int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII,
return Hexagon::ArchV4;
case HexagonII::HasV5SubT:
return Hexagon::ArchV5;
+ case HexagonII::HasV55SubT:
+ return Hexagon::ArchV55;
+ case HexagonII::HasV60SubT:
+ return Hexagon::ArchV60;
}
}
-// Return the Hexagon ISA class for the insn.
-unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII,
- MCInst const &MCI) {
- const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
-
- return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
-}
-
+/// Return the slots this instruction can execute out of
unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII,
MCSubtargetInfo const &STI,
MCInst const &MCI) {
-
const InstrItinerary *II = STI.getSchedModel().InstrItineraries;
int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
return ((II[SchedClass].FirstStage + HexagonStages)->getUnits());
}
-bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) {
+/// Return the slots this instruction consumes in addition to
+/// the slot(s) it can execute out of
+
+unsigned HexagonMCInstrInfo::getOtherReservedSlots(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ MCInst const &MCI) {
+ const InstrItinerary *II = STI.getSchedModel().InstrItineraries;
+ int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
+ unsigned Slots = 0;
+
+ // FirstStage are slots that this instruction can execute in.
+ // FirstStage+1 are slots that are also consumed by this instruction.
+ // For example: vmemu can only execute in slot 0 but also consumes slot 1.
+ for (unsigned Stage = II[SchedClass].FirstStage + 1;
+ Stage < II[SchedClass].LastStage; ++Stage) {
+ unsigned Units = (Stage + HexagonStages)->getUnits();
+ if (Units > HexagonGetLastSlot())
+ break;
+ // fyi: getUnits() will return 0x1, 0x2, 0x4 or 0x8
+ Slots |= Units;
+ }
+
+ // if 0 is returned, then no additional slots are consumed by this inst.
+ return Slots;
+}
+
+bool HexagonMCInstrInfo::hasDuplex(MCInstrInfo const &MCII, MCInst const &MCI) {
if (!HexagonMCInstrInfo::isBundle(MCI))
return false;
for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) {
auto MI = I.getInst();
- if (isImmext(*MI))
+ if (HexagonMCInstrInfo::isDuplex(MCII, *MI))
return true;
}
@@ -398,7 +410,20 @@ bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) {
return extenderForIndex(MCB, Index) != nullptr;
}
-// Return whether the instruction is a legal new-value producer.
+bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) {
+ if (!HexagonMCInstrInfo::isBundle(MCI))
+ return false;
+
+ for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) {
+ auto MI = I.getInst();
+ if (isImmext(*MI))
+ return true;
+ }
+
+ return false;
+}
+
+/// Return whether the insn produces a value.
bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -418,46 +443,19 @@ MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) {
return *MCB.getOperand(bundleInstructionsOffset + Index).getInst();
}
+/// Return where the instruction is an accumulator.
+bool HexagonMCInstrInfo::isAccumulator(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask);
+}
+
bool HexagonMCInstrInfo::isBundle(MCInst const &MCI) {
auto Result = Hexagon::BUNDLE == MCI.getOpcode();
assert(!Result || (MCI.size() > 0 && MCI.getOperand(0).isImm()));
return Result;
}
-// Return whether the insn is an actual insn.
-bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
- return (!HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() &&
- !HexagonMCInstrInfo::isPrefix(MCII, MCI) &&
- HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP);
-}
-
-bool HexagonMCInstrInfo::isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI) {
- const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return ((F >> HexagonII::CofMax1Pos) & HexagonII::CofMax1Mask);
-}
-
-bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII,
- MCInst const &MCI) {
- return (getType(MCII, MCI) == HexagonII::TypeCOMPOUND);
-}
-
-bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) {
- return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) ||
- (Reg >= Hexagon::D8 && Reg <= Hexagon::D11));
-}
-
-bool HexagonMCInstrInfo::isDuplex(MCInstrInfo const &MCII, MCInst const &MCI) {
- return HexagonII::TypeDUPLEX == HexagonMCInstrInfo::getType(MCII, MCI);
-}
-
-// Return whether the instruction needs to be constant extended.
-// 1) Always return true if the instruction has 'isExtended' flag set.
-//
-// isExtendable:
-// 2) For immediate extended operands, return true only if the value is
-// out-of-range.
-// 3) For global address, always return true.
-
bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
MCInst const &MCI) {
if (HexagonMCInstrInfo::isExtended(MCII, MCI))
@@ -470,9 +468,9 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
return true;
// Branch insns are handled as necessary by relaxation.
if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeJ) ||
- (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCOMPOUND &&
+ (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCJ &&
HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()) ||
- (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV &&
+ (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNCJ &&
HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()))
return false;
// Otherwise loop instructions and other CR insts are handled by relaxation
@@ -492,6 +490,30 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
return (MinValue > Value || Value > MaxValue);
}
+bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
+ return !HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() &&
+ !HexagonMCInstrInfo::isPrefix(MCII, MCI);
+}
+
+bool HexagonMCInstrInfo::isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::CofMax1Pos) & HexagonII::CofMax1Mask);
+}
+
+bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ return (getType(MCII, MCI) == HexagonII::TypeCJ);
+}
+
+bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) {
+ return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) ||
+ (Reg >= Hexagon::D8 && Reg <= Hexagon::D11));
+}
+
+bool HexagonMCInstrInfo::isDuplex(MCInstrInfo const &MCII, MCInst const &MCI) {
+ return HexagonII::TypeDUPLEX == HexagonMCInstrInfo::getType(MCII, MCI);
+}
+
bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII,
MCInst const &MCI) {
uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -510,9 +532,7 @@ bool HexagonMCInstrInfo::isFloat(MCInstrInfo const &MCII, MCInst const &MCI) {
}
bool HexagonMCInstrInfo::isImmext(MCInst const &MCI) {
- auto Op = MCI.getOpcode();
- return (Op == Hexagon::A4_ext_b || Op == Hexagon::A4_ext_c ||
- Op == Hexagon::A4_ext_g || Op == Hexagon::A4_ext);
+ return MCI.getOpcode() == Hexagon::A4_ext;
}
bool HexagonMCInstrInfo::isInnerLoop(MCInst const &MCI) {
@@ -530,20 +550,17 @@ bool HexagonMCInstrInfo::isIntRegForSubInst(unsigned Reg) {
(Reg >= Hexagon::R16 && Reg <= Hexagon::R23));
}
-// Return whether the insn is a new-value consumer.
+/// Return whether the insn expects newly produced value.
bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
}
-// Return whether the operand can be constant extended.
-bool HexagonMCInstrInfo::isOperandExtended(MCInstrInfo const &MCII,
- MCInst const &MCI,
- unsigned short OperandNum) {
- uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) ==
- OperandNum;
+/// Return whether the operand is extendable.
+bool HexagonMCInstrInfo::isOpExtendable(MCInstrInfo const &MCII,
+ MCInst const &MCI, unsigned short O) {
+ return (O == HexagonMCInstrInfo::getExtendableOp(MCII, MCI));
}
bool HexagonMCInstrInfo::isOuterLoop(MCInst const &MCI) {
@@ -558,6 +575,10 @@ bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII,
return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
}
+bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) {
+ return HexagonII::TypeEXTENDER == HexagonMCInstrInfo::getType(MCII, MCI);
+}
+
bool HexagonMCInstrInfo::isPredicateLate(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -582,12 +603,22 @@ bool HexagonMCInstrInfo::isPredReg(unsigned Reg) {
return (Reg >= Hexagon::P0 && Reg <= Hexagon::P3_0);
}
-bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) {
- return (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypePREFIX);
+/// Return whether the insn can be packaged only with A and X-type insns.
+bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask);
}
-bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) {
+/// Return whether the insn can be packaged only with an A-type insn in slot #1.
+bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask);
+}
+
+/// Return whether the insn is solo, i.e., cannot be in a packet.
+bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = MCII.get(MCI.getOpcode()).TSFlags;
return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
}
@@ -663,17 +694,6 @@ bool HexagonMCInstrInfo::isSubInstruction(MCInst const &MCI) {
}
}
-bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) {
- const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask);
-}
-
-bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII,
- MCInst const &MCI) {
- const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask);
-}
-
bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) {
if ((getType(MCII, MCI) <= HexagonII::TypeCVI_LAST) &&
(getType(MCII, MCI) >= HexagonII::TypeCVI_FIRST))
@@ -705,16 +725,26 @@ bool HexagonMCInstrInfo::mustExtend(MCExpr const &Expr) {
return HExpr.mustExtend();
}
void HexagonMCInstrInfo::setMustNotExtend(MCExpr const &Expr, bool Val) {
- HexagonMCExpr &HExpr =
- const_cast<HexagonMCExpr &>(cast<HexagonMCExpr>(Expr));
+ HexagonMCExpr &HExpr = const_cast<HexagonMCExpr &>(cast<HexagonMCExpr>(Expr));
HExpr.setMustNotExtend(Val);
}
bool HexagonMCInstrInfo::mustNotExtend(MCExpr const &Expr) {
HexagonMCExpr const &HExpr = cast<HexagonMCExpr>(Expr);
return HExpr.mustNotExtend();
}
+void HexagonMCInstrInfo::setS23_2_reloc(MCExpr const &Expr, bool Val) {
+ HexagonMCExpr &HExpr =
+ const_cast<HexagonMCExpr &>(*llvm::cast<HexagonMCExpr>(&Expr));
+ HExpr.setS23_2_reloc(Val);
+}
+bool HexagonMCInstrInfo::s23_2_reloc(MCExpr const &Expr) {
+ HexagonMCExpr const *HExpr = llvm::dyn_cast<HexagonMCExpr>(&Expr);
+ if (!HExpr)
+ return false;
+ return HExpr->s23_2_reloc();
+}
-void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) {
+void HexagonMCInstrInfo::padEndloop(MCInst &MCB, MCContext &Context) {
MCInst Nop;
Nop.setOpcode(Hexagon::A2_nop);
assert(isBundle(MCB));
@@ -727,22 +757,8 @@ void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) {
bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII,
MCInst const &MCI) {
- if (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR)
- return false;
-
- unsigned SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
- switch (SchedClass) {
- case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123:
- case Hexagon::Sched::ALU64_tc_2_SLOT23:
- case Hexagon::Sched::ALU64_tc_3x_SLOT23:
- case Hexagon::Sched::M_tc_2_SLOT23:
- case Hexagon::Sched::M_tc_3x_SLOT23:
- case Hexagon::Sched::S_2op_tc_2_SLOT23:
- case Hexagon::Sched::S_3op_tc_2_SLOT23:
- case Hexagon::Sched::S_3op_tc_3x_SLOT23:
- return true;
- }
- return false;
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (F >> HexagonII::PrefersSlot3Pos) & HexagonII::PrefersSlot3Mask;
}
void HexagonMCInstrInfo::replaceDuplex(MCContext &Context, MCInst &MCB,
@@ -778,15 +794,6 @@ void HexagonMCInstrInfo::setMemStoreReorderEnabled(MCInst &MCI) {
Operand.setImm(Operand.getImm() | memStoreReorderEnabledMask);
assert(isMemStoreReorderEnabled(MCI));
}
-void HexagonMCInstrInfo::setS23_2_reloc(MCExpr const &Expr, bool Val) {
- HexagonMCExpr &HExpr =
- const_cast<HexagonMCExpr &>(*llvm::cast<HexagonMCExpr>(&Expr));
- HExpr.setS23_2_reloc(Val);
-}
-bool HexagonMCInstrInfo::s23_2_reloc(MCExpr const &Expr) {
- HexagonMCExpr const &HExpr = *llvm::cast<HexagonMCExpr>(&Expr);
- return HExpr.s23_2_reloc();
-}
void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) {
assert(isBundle(MCI));
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index d701c3ade69e..2e989adb5ccb 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -19,11 +19,8 @@
namespace llvm {
class HexagonMCChecker;
-class MCContext;
class MCInstrDesc;
class MCInstrInfo;
-class MCInst;
-class MCOperand;
class MCSubtargetInfo;
namespace HexagonII {
enum class MemAccessSize;
@@ -67,16 +64,6 @@ bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCContext &Context, MCInst &MCB,
HexagonMCChecker *Checker);
-// Clamp off upper 26 bits of extendable operand for emission
-void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
-
-MCInst createBundle();
-
-// Return the extender for instruction at Index or nullptr if none
-MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
-void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
- MCInst const &MCI);
-
// Create a duplex instruction given the two subinsts
MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0,
MCInst const &inst1);
@@ -86,27 +73,28 @@ MCInst deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst,
// Convert this instruction in to a duplex subinst
MCInst deriveSubInst(MCInst const &Inst);
+// Clamp off upper 26 bits of extendable operand for emission
+void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
+
+MCInst createBundle();
+
// Return the extender for instruction at Index or nullptr if none
MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
+void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI);
// Return memory access size
HexagonII::MemAccessSize getAccessSize(MCInstrInfo const &MCII,
MCInst const &MCI);
-
-// Return number of bits in the constant extended operand.
-unsigned getBitCount(MCInstrInfo const &MCII, MCInst const &MCI);
-
-// Return constant extended operand number.
-unsigned short getCExtOpNum(MCInstrInfo const &MCII, MCInst const &MCI);
-
MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI);
// Return which duplex group this instruction belongs to
unsigned getDuplexCandidateGroup(MCInst const &MI);
// Return a list of all possible instruction duplex combinations
-SmallVector<DuplexCandidate, 8> getDuplexPossibilties(MCInstrInfo const &MCII,
- MCInst const &MCB);
+SmallVector<DuplexCandidate, 8>
+getDuplexPossibilties(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst const &MCB);
unsigned getDuplexRegisterNumbering(unsigned Reg);
MCExpr const &getExpr(MCExpr const &Expr);
@@ -143,7 +131,6 @@ MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI);
unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI);
MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII,
MCInst const &MCI);
-
int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI);
// Return the Hexagon ISA class for the insn.
@@ -152,6 +139,9 @@ unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI);
/// Return the slots used by the insn.
unsigned getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCInst const &MCI);
+unsigned getOtherReservedSlots(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI, MCInst const &MCI);
+bool hasDuplex(MCInstrInfo const &MCII, MCInst const &MCI);
// Does the packet have an extender for the instruction at Index
bool hasExtenderForIndex(MCInst const &MCB, size_t Index);
@@ -161,19 +151,6 @@ bool hasImmExt(MCInst const &MCI);
// Return whether the instruction is a legal new-value producer.
bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI);
-
-// Return the instruction at Index
-MCInst const &instruction(MCInst const &MCB, size_t Index);
-
-// Returns whether this MCInst is a wellformed bundle
-bool isBundle(MCInst const &MCI);
-
-// Return whether the insn is an actual insn.
-bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
-bool isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI);
-bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI);
-
-// Return the duplex iclass given the two duplex classes
unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb);
int64_t minConstant(MCInst const &MCI, size_t Index);
@@ -189,6 +166,18 @@ template <unsigned N> bool inRange(MCInst const &MCI, size_t Index) {
return isUInt<N>(minConstant(MCI, Index));
}
+// Return the instruction at Index
+MCInst const &instruction(MCInst const &MCB, size_t Index);
+bool isAccumulator(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Returns whether this MCInst is a wellformed bundle
+bool isBundle(MCInst const &MCI);
+
+// Return whether the insn is an actual insn.
+bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI);
+
// Return whether the instruction needs to be constant extended.
bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -229,15 +218,12 @@ bool isMemStoreReorderEnabled(MCInst const &MCI);
// Return whether the insn is a new-value consumer.
bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
-
-// Return true if the operand can be constant extended.
-bool isOperandExtended(MCInstrInfo const &MCII, MCInst const &MCI,
- unsigned short OperandNum);
+bool isOpExtendable(MCInstrInfo const &MCII, MCInst const &MCI, unsigned short);
// Can these two instructions be duplexed
bool isOrderedDuplexPair(MCInstrInfo const &MCII, MCInst const &MIa,
bool ExtendedA, MCInst const &MIb, bool ExtendedB,
- bool bisReversable);
+ bool bisReversable, MCSubtargetInfo const &STI);
// Returns whether this bundle is an endloop1
bool isOuterLoop(MCInst const &MCI);
@@ -270,12 +256,11 @@ bool mustExtend(MCExpr const &Expr);
bool mustNotExtend(MCExpr const &Expr);
// Pad the bundle with nops to satisfy endloop requirements
-void padEndloop(MCContext &Context, MCInst &MCI);
-
+void padEndloop(MCInst &MCI, MCContext &Context);
bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI);
// Replace the instructions inside MCB, represented by Candidate
-void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate);
+void replaceDuplex(MCContext &Context, MCInst &MCI, DuplexCandidate Candidate);
bool s23_2_reloc(MCExpr const &Expr);
// Marks a bundle as endloop0
@@ -295,7 +280,8 @@ unsigned SubregisterBit(unsigned Consumer, unsigned Producer,
unsigned Producer2);
// Attempt to find and replace compound pairs
-void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
+void tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &MCI);
}
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
index 7f8e7a4edb0c..529a5fd5ed82 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -33,42 +33,39 @@ void HexagonMCShuffler::init(MCInst &MCB) {
MCInst const *Extender = nullptr;
// Copy the bundle for the shuffling.
for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
- assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo());
- MCInst *MI = const_cast<MCInst *>(I.getInst());
+ MCInst &MI = *const_cast<MCInst *>(I.getInst());
+ DEBUG(dbgs() << "Shuffling: " << MCII.getName(MI.getOpcode()) << '\n');
+ assert(!HexagonMCInstrInfo::getDesc(MCII, MI).isPseudo());
- if (!HexagonMCInstrInfo::isImmext(*MI)) {
- append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI),
- false);
+ if (!HexagonMCInstrInfo::isImmext(MI)) {
+ append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, MI));
Extender = nullptr;
} else
- Extender = MI;
+ Extender = &MI;
}
}
BundleFlags = MCB.getOperand(0).getImm();
}
-void HexagonMCShuffler::init(MCInst &MCB, MCInst const *AddMI,
+void HexagonMCShuffler::init(MCInst &MCB, MCInst const &AddMI,
bool bInsertAtFront) {
if (HexagonMCInstrInfo::isBundle(MCB)) {
- if (bInsertAtFront && AddMI)
- append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI),
- false);
+ if (bInsertAtFront)
+ append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, AddMI));
MCInst const *Extender = nullptr;
// Copy the bundle for the shuffling.
for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo());
- MCInst *MI = const_cast<MCInst *>(I.getInst());
- if (!HexagonMCInstrInfo::isImmext(*MI)) {
- append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI),
- false);
+ MCInst &MI = *const_cast<MCInst *>(I.getInst());
+ if (!HexagonMCInstrInfo::isImmext(MI)) {
+ append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, MI));
Extender = nullptr;
} else
- Extender = MI;
+ Extender = &MI;
}
- if (!bInsertAtFront && AddMI)
- append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI),
- false);
+ if (!bInsertAtFront)
+ append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, AddMI));
}
BundleFlags = MCB.getOperand(0).getImm();
@@ -80,11 +77,11 @@ void HexagonMCShuffler::copyTo(MCInst &MCB) {
// Copy the results into the bundle.
for (HexagonShuffler::iterator I = begin(); I != end(); ++I) {
- MCInst const *MI = I->getDesc();
+ MCInst const &MI = I->getDesc();
MCInst const *Extender = I->getExtender();
if (Extender)
MCB.addOperand(MCOperand::createInst(Extender));
- MCB.addOperand(MCOperand::createInst(MI));
+ MCB.addOperand(MCOperand::createInst(&MI));
}
}
@@ -98,9 +95,9 @@ bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) {
return (!getError());
}
-bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
- MCInst &MCB) {
- HexagonMCShuffler MCS(MCII, STI, MCB);
+bool llvm::HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI, MCInst &MCB) {
+ HexagonMCShuffler MCS(true, MCII, STI, MCB);
if (DisableShuffle)
// Ignore if user chose so.
@@ -124,6 +121,18 @@ bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
if (!MCS.reshuffleTo(MCB)) {
// Unless there is any error, which should not happen at this point.
unsigned shuffleError = MCS.getError();
+
+ if (!Fatal && (shuffleError != HexagonShuffler::SHUFFLE_SUCCESS))
+ return false;
+ if (shuffleError != HexagonShuffler::SHUFFLE_SUCCESS) {
+ errs() << "\nFailing packet:\n";
+ for (const auto& I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ MCInst *MI = const_cast<MCInst *>(I.getInst());
+ errs() << HexagonMCInstrInfo::getName(MCII, *MI) << ' ' << HexagonMCInstrInfo::getDesc(MCII, *MI).getOpcode() << '\n';
+ }
+ errs() << '\n';
+ }
+
switch (shuffleError) {
default:
llvm_unreachable("unknown error");
@@ -176,7 +185,7 @@ llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
DuplexCandidate duplexToTry = possibleDuplexes.pop_back_val();
MCInst Attempt(MCB);
HexagonMCInstrInfo::replaceDuplex(Context, Attempt, duplexToTry);
- HexagonMCShuffler MCS(MCII, STI, Attempt); // copy packet to the shuffler
+ HexagonMCShuffler MCS(true, MCII, STI, Attempt); // copy packet to the shuffler
if (MCS.size() == 1) { // case of one duplex
// copy the created duplex in the shuffler to the bundle
MCS.copyTo(MCB);
@@ -191,7 +200,7 @@ llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
}
if (doneShuffling == false) {
- HexagonMCShuffler MCS(MCII, STI, MCB);
+ HexagonMCShuffler MCS(true, MCII, STI, MCB);
doneShuffling = MCS.reshuffleTo(MCB); // shuffle
shuffleError = MCS.getError();
}
@@ -202,8 +211,8 @@ llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
}
bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
- MCInst &MCB, MCInst const *AddMI, int fixupCount) {
- if (!HexagonMCInstrInfo::isBundle(MCB) || !AddMI)
+ MCInst &MCB, MCInst const &AddMI, int fixupCount) {
+ if (!HexagonMCInstrInfo::isBundle(MCB))
return false;
// if fixups present, make sure we don't insert too many nops that would
@@ -211,8 +220,15 @@ bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
unsigned int bundleSize = HexagonMCInstrInfo::bundleSize(MCB);
if (bundleSize >= HEXAGON_PACKET_SIZE)
return false;
+ bool bhasDuplex = HexagonMCInstrInfo::hasDuplex(MCII, MCB);
if (fixupCount >= 2) {
- return false;
+ if (bhasDuplex) {
+ if (bundleSize >= HEXAGON_PACKET_SIZE - 1) {
+ return false;
+ }
+ } else {
+ return false;
+ }
} else {
if (bundleSize == HEXAGON_PACKET_SIZE - 1 && fixupCount)
return false;
@@ -221,7 +237,16 @@ bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
if (DisableShuffle)
return false;
- HexagonMCShuffler MCS(MCII, STI, MCB, AddMI);
+ // mgl: temporary code (shuffler doesn't take into account the fact that
+ // a duplex takes up two slots. for example, 3 nops can be put into a packet
+ // containing a duplex oversubscribing slots by 1).
+ unsigned maxBundleSize = (HexagonMCInstrInfo::hasImmExt(MCB))
+ ? HEXAGON_PACKET_SIZE
+ : HEXAGON_PACKET_SIZE - 1;
+ if (bhasDuplex && bundleSize >= maxBundleSize)
+ return false;
+
+ HexagonMCShuffler MCS(MCII, STI, MCB, AddMI, false);
if (!MCS.reshuffleTo(MCB)) {
unsigned shuffleError = MCS.getError();
switch (shuffleError) {
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
index a21cce1fc240..14bbfda4c914 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
@@ -27,16 +27,16 @@ class HexagonMCShuffler : public HexagonShuffler {
bool duplex_present;
public:
- HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
- MCInst &MCB)
+ HexagonMCShuffler(bool Fatal, MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI, MCInst &MCB)
: HexagonShuffler(MCII, STI) {
init(MCB);
};
HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
- MCInst &MCB, const MCInst *AddMI,
- bool bInsertAtFront = false)
+ MCInst &MCB, MCInst const &AddMI,
+ bool InsertAtFront)
: HexagonShuffler(MCII, STI) {
- init(MCB, AddMI, bInsertAtFront);
+ init(MCB, AddMI, InsertAtFront);
};
// Copy reordered bundle to another.
@@ -49,14 +49,14 @@ public:
private:
void init(MCInst &MCB);
- void init(MCInst &MCB, const MCInst *AddMI, bool bInsertAtFront = false);
+ void init(MCInst &MCB, MCInst const &AddMI, bool InsertAtFront);
};
// Invocation of the shuffler.
+bool HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI, MCInst &);
bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
- MCInst &);
-bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
- MCInst &, const MCInst *, int);
+ MCInst &, MCInst const &, int);
unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCContext &Context, MCInst &,
SmallVector<DuplexCandidate, 8>);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 694cf582f8d9..bb98c2bbef6d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -66,6 +67,12 @@ static cl::opt<bool> HexagonV55ArchVariant("mv55", cl::Hidden, cl::init(false),
static cl::opt<bool> HexagonV60ArchVariant("mv60", cl::Hidden, cl::init(false),
cl::desc("Build for Hexagon V60"));
+static cl::opt<bool> HexagonV62ArchVariant("mv62", cl::Hidden, cl::init(false),
+ cl::desc("Build for Hexagon V62"));
+
+static cl::opt<bool> EnableHVX("mhvx", cl::Hidden, cl::init(false),
+ cl::desc("Enable Hexagon Vector Extension (HVX)"));
+
static StringRef DefaultArch = "hexagonv60";
static StringRef HexagonGetArchVariant() {
@@ -77,6 +84,8 @@ static StringRef HexagonGetArchVariant() {
return "hexagonv55";
if (HexagonV60ArchVariant)
return "hexagonv60";
+ if (HexagonV62ArchVariant)
+ return "hexagonv62";
return "";
}
@@ -95,31 +104,16 @@ StringRef Hexagon_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) {
return ArchV;
}
-MCInstrInfo *llvm::createHexagonMCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitHexagonMCInstrInfo(X);
- return X;
-}
-
-static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitHexagonMCRegisterInfo(X, Hexagon::R31);
- return X;
-}
-
-static MCSubtargetInfo *
-createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- CPU = Hexagon_MC::selectHexagonCPU(TT, CPU);
- return createHexagonMCSubtargetInfoImpl(TT, CPU, FS);
-}
+unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV4FU::SLOT3; }
namespace {
class HexagonTargetAsmStreamer : public HexagonTargetStreamer {
public:
HexagonTargetAsmStreamer(MCStreamer &S,
- formatted_raw_ostream &, bool,
- MCInstPrinter &)
+ formatted_raw_ostream &OS,
+ bool isVerboseAsm,
+ MCInstPrinter &IP)
: HexagonTargetStreamer(S) {}
void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
@@ -156,24 +150,15 @@ public:
class HexagonTargetELFStreamer : public HexagonTargetStreamer {
public:
+ MCELFStreamer &getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+ }
HexagonTargetELFStreamer(MCStreamer &S, MCSubtargetInfo const &STI)
: HexagonTargetStreamer(S) {
- auto Bits = STI.getFeatureBits();
- unsigned Flags = 0;
- if (Bits[Hexagon::ArchV60])
- Flags = ELF::EF_HEXAGON_MACH_V60;
- else if (Bits[Hexagon::ArchV55])
- Flags = ELF::EF_HEXAGON_MACH_V55;
- else if (Bits[Hexagon::ArchV5])
- Flags = ELF::EF_HEXAGON_MACH_V5;
- else if (Bits[Hexagon::ArchV4])
- Flags = ELF::EF_HEXAGON_MACH_V4;
- getStreamer().getAssembler().setELFHeaderEFlags(Flags);
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCA.setELFHeaderEFlags(Hexagon_MC::GetELFFlags(STI));
}
- MCELFStreamer &getStreamer() {
- return static_cast<MCELFStreamer &>(Streamer);
- }
void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment,
@@ -196,13 +181,26 @@ public:
} // end anonymous namespace
+llvm::MCInstrInfo *llvm::createHexagonMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitHexagonMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitHexagonMCRegisterInfo(X, Hexagon::R31);
+ return X;
+}
+
static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TT) {
MCAsmInfo *MAI = new HexagonMCAsmInfo(TT);
// VirtualFP = (R30 + #0).
MCCFIInstruction Inst =
- MCCFIInstruction::createDefCfa(nullptr, Hexagon::R30, 0);
+ MCCFIInstruction::createDefCfa(nullptr,
+ MRI.getDwarfRegNum(Hexagon::R30, true), 0);
MAI->addInitialFrameState(Inst);
return MAI;
@@ -212,31 +210,138 @@ static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI) {
+ const MCRegisterInfo &MRI)
+{
if (SyntaxVariant == 0)
- return (new HexagonInstPrinter(MAI, MII, MRI));
+ return new HexagonInstPrinter(MAI, MII, MRI);
else
return nullptr;
}
-static MCTargetStreamer *createMCAsmTargetStreamer(MCStreamer &S,
- formatted_raw_ostream &OS,
- MCInstPrinter *InstPrint,
- bool IsVerboseAsm) {
- return new HexagonTargetAsmStreamer(S, OS, IsVerboseAsm, *InstPrint);
+static MCTargetStreamer *
+createMCAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS,
+ MCInstPrinter *IP, bool IsVerboseAsm) {
+ return new HexagonTargetAsmStreamer(S, OS, IsVerboseAsm, *IP);
}
-static MCStreamer *createMCStreamer(Triple const &T, MCContext &Context,
- MCAsmBackend &MAB, raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter, bool RelaxAll) {
- return createHexagonELFStreamer(Context, MAB, OS, Emitter);
+static MCStreamer *createMCStreamer(Triple const &T,
+ MCContext &Context,
+ MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll) {
+ return createHexagonELFStreamer(T, Context, MAB, OS, Emitter);
}
static MCTargetStreamer *
-createHexagonObjectTargetStreamer(MCStreamer &S, MCSubtargetInfo const &STI) {
+createHexagonObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
return new HexagonTargetELFStreamer(S, STI);
}
+static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) {
+ uint64_t FB = STI->getFeatureBits().to_ullong();
+ if (FB & (1ULL << F))
+ STI->ToggleFeature(F);
+}
+
+static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) {
+ uint64_t FB = STI->getFeatureBits().to_ullong();
+ return (FB & (1ULL << F)) != 0;
+}
+
+StringRef Hexagon_MC::ParseHexagonTriple(const Triple &TT, StringRef CPU) {
+ StringRef CPUName = Hexagon_MC::selectHexagonCPU(TT, CPU);
+ StringRef FS = "";
+ if (EnableHVX) {
+ if (CPUName.equals_lower("hexagonv60") ||
+ CPUName.equals_lower("hexagonv62"))
+ FS = "+hvx";
+ }
+ return FS;
+}
+
+static bool isCPUValid(std::string CPU)
+{
+ std::vector<std::string> table
+ {
+ "hexagonv4",
+ "hexagonv5",
+ "hexagonv55",
+ "hexagonv60",
+ "hexagonv62",
+ };
+
+ return std::find(table.begin(), table.end(), CPU) != table.end();
+}
+
+MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT,
+ StringRef CPU,
+ StringRef FS) {
+ StringRef ArchFS = (FS.size()) ? FS : Hexagon_MC::ParseHexagonTriple(TT, CPU);
+ StringRef CPUName = Hexagon_MC::selectHexagonCPU(TT, CPU);
+ if (!isCPUValid(CPUName.str())) {
+ errs() << "error: invalid CPU \"" << CPUName.str().c_str()
+ << "\" specified\n";
+ return nullptr;
+ }
+
+ MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(TT, CPUName, ArchFS);
+ if (X->getFeatureBits()[Hexagon::ExtensionHVXDbl]) {
+ llvm::FeatureBitset Features = X->getFeatureBits();
+ X->setFeatureBits(Features.set(Hexagon::ExtensionHVX));
+ }
+ return X;
+}
+
+unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
+ static std::map<StringRef,unsigned> ElfFlags = {
+ {"hexagonv4", ELF::EF_HEXAGON_MACH_V4},
+ {"hexagonv5", ELF::EF_HEXAGON_MACH_V5},
+ {"hexagonv55", ELF::EF_HEXAGON_MACH_V55},
+ {"hexagonv60", ELF::EF_HEXAGON_MACH_V60},
+ {"hexagonv62", ELF::EF_HEXAGON_MACH_V62},
+ };
+
+ auto F = ElfFlags.find(STI.getCPU());
+ assert(F != ElfFlags.end() && "Unrecognized Architecture");
+ return F->second;
+}
+
+namespace {
+class HexagonMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ HexagonMCInstrAnalysis(MCInstrInfo const *Info) : MCInstrAnalysis(Info) {}
+
+ bool isUnconditionalBranch(MCInst const &Inst) const override {
+ //assert(!HexagonMCInstrInfo::isBundle(Inst));
+ return MCInstrAnalysis::isUnconditionalBranch(Inst);
+ }
+
+ bool isConditionalBranch(MCInst const &Inst) const override {
+ //assert(!HexagonMCInstrInfo::isBundle(Inst));
+ return MCInstrAnalysis::isConditionalBranch(Inst);
+ }
+
+ bool evaluateBranch(MCInst const &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const override {
+ //assert(!HexagonMCInstrInfo::isBundle(Inst));
+ if(!HexagonMCInstrInfo::isExtendable(*Info, Inst))
+ return false;
+ auto const &Extended(HexagonMCInstrInfo::getExtendableOperand(*Info, Inst));
+ assert(Extended.isExpr());
+ int64_t Value;
+ if(!Extended.getExpr()->evaluateAsAbsolute(Value))
+ return false;
+ Target = Value;
+ return true;
+ }
+};
+}
+
+static MCInstrAnalysis *createHexagonMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new HexagonMCInstrAnalysis(Info);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeHexagonTargetMC() {
// Register the MC asm info.
@@ -252,7 +357,7 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(getTheHexagonTarget(),
- createHexagonMCSubtargetInfo);
+ Hexagon_MC::createHexagonMCSubtargetInfo);
// Register the MC Code Emitter
TargetRegistry::RegisterMCCodeEmitter(getTheHexagonTarget(),
@@ -262,8 +367,18 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
TargetRegistry::RegisterMCAsmBackend(getTheHexagonTarget(),
createHexagonAsmBackend);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(getTheHexagonTarget(),
+ createHexagonMCInstrAnalysis);
+
// Register the obj streamer
- TargetRegistry::RegisterELFStreamer(getTheHexagonTarget(), createMCStreamer);
+ TargetRegistry::RegisterELFStreamer(getTheHexagonTarget(),
+ createMCStreamer);
+
+ // Register the obj target streamer
+ TargetRegistry::RegisterObjectTargetStreamer(getTheHexagonTarget(),
+ createHexagonObjectTargetStreamer);
// Register the asm streamer
TargetRegistry::RegisterAsmTargetStreamer(getTheHexagonTarget(),
@@ -272,7 +387,4 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
// Register the MC Inst Printer
TargetRegistry::RegisterMCInstPrinter(getTheHexagonTarget(),
createHexagonMCInstPrinter);
-
- TargetRegistry::RegisterObjectTargetStreamer(
- getTheHexagonTarget(), createHexagonObjectTargetStreamer);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 6e677e9d9f86..6bb69be6142e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -41,6 +41,18 @@ extern cl::opt<bool> HexagonDisableDuplex;
extern const InstrStage HexagonStages[];
MCInstrInfo *createHexagonMCInstrInfo();
+MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT);
+
+namespace Hexagon_MC {
+ StringRef ParseHexagonTriple(const Triple &TT, StringRef CPU);
+ StringRef selectHexagonCPU(const Triple &TT, StringRef CPU);
+
+ /// Create a Hexagon MCSubtargetInfo instance. This is exposed so Asm parser,
+ /// etc. do not need to go through TargetRegistry.
+ MCSubtargetInfo *createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU,
+ StringRef FS);
+ unsigned GetELFFlags(const MCSubtargetInfo &STI);
+}
MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
@@ -54,13 +66,9 @@ MCAsmBackend *createHexagonAsmBackend(const Target &T,
MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
uint8_t OSABI, StringRef CPU);
-namespace Hexagon_MC {
-
- StringRef selectHexagonCPU(const Triple &TT, StringRef CPU);
-
-} // end namespace Hexagon_MC
+unsigned HexagonGetLastSlot();
-} // end namespace llvm
+} // End llvm namespace
// Define symbolic names for Hexagon registers. This defines a mapping from
// register name to register number.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 88f37d620dcf..853f76213d38 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -22,6 +22,7 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "HexagonShuffler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -37,16 +38,16 @@ class HexagonBid {
unsigned Bid;
public:
- HexagonBid() : Bid(0){};
- HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; };
+ HexagonBid() : Bid(0){}
+ HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; }
// Check if the insn priority is overflowed.
- bool isSold() const { return (Bid >= MAX); };
+ bool isSold() const { return (Bid >= MAX); }
HexagonBid &operator+=(const HexagonBid &B) {
Bid += B.Bid;
return *this;
- };
+ }
};
// Slot shuffling allocation.
@@ -56,7 +57,7 @@ class HexagonUnitAuction {
unsigned isSold : HEXAGON_PACKET_SIZE;
public:
- HexagonUnitAuction() : isSold(0){};
+ HexagonUnitAuction(unsigned cs = 0) : isSold(cs){};
// Allocate slots.
bool bid(unsigned B) {
@@ -70,29 +71,29 @@ public:
isSold |= Scores[i].isSold() << i;
}
return true;
- ;
} else
// Error if the desired slots are already full.
return false;
- };
+ }
};
} // end anonymous namespace
unsigned HexagonResource::setWeight(unsigned s) {
const unsigned SlotWeight = 8;
const unsigned MaskWeight = SlotWeight - 1;
- bool Key = (1 << s) & getUnits();
-
- // TODO: Improve this API so that we can prevent misuse statically.
- assert(SlotWeight * s < 32 && "Argument to setWeight too large.");
+ unsigned Units = getUnits();
+ unsigned Key = ((1u << s) & Units) != 0;
// Calculate relative weight of the insn for the given slot, weighing it the
// heavier the more restrictive the insn is and the lowest the slots that the
// insn may be executed in.
- Weight =
- (Key << (SlotWeight * s)) * ((MaskWeight - countPopulation(getUnits()))
- << countTrailingZeros(getUnits()));
- return (Weight);
+ if (Key == 0 || Units == 0 || (SlotWeight*s >= 32))
+ return Weight = 0;
+
+ unsigned Ctpop = countPopulation(Units);
+ unsigned Cttz = countTrailingZeros(Units);
+ Weight = (1u << (SlotWeight * s)) * ((MaskWeight - Ctpop) << Cttz);
+ return Weight;
}
void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) {
@@ -104,7 +105,10 @@ void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) {
(*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1);
(*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2);
(*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1);
- (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1);
+ (*TUL)[HexagonII::TypeCVI_VINLANESAT] =
+ (CPU == "hexagonv60" || CPU == "hexagonv61" || CPU == "hexagonv61v1") ?
+ UnitsAndLanes(CVI_SHIFT, 1) :
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
(*TUL)[HexagonII::TypeCVI_VM_LD] =
UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
(*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0);
@@ -141,6 +145,40 @@ HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL,
}
}
+struct CVIUnits {
+ unsigned Units;
+ unsigned Lanes;
+};
+typedef SmallVector<struct CVIUnits, 8> HVXInstsT;
+
+static unsigned makeAllBits(unsigned startBit, unsigned Lanes)
+
+{
+ for (unsigned i = 1 ; i < Lanes ; ++i)
+ startBit = (startBit << 1) | startBit;
+ return startBit;
+}
+
+static bool checkHVXPipes(const HVXInstsT& hvxInsts, unsigned startIdx, unsigned usedUnits)
+
+{
+ if (startIdx < hvxInsts.size()) {
+ if (!hvxInsts[startIdx].Units)
+ return checkHVXPipes(hvxInsts, startIdx + 1, usedUnits);
+ for (unsigned b = 0x1 ; b <= 0x8 ; b <<= 1) {
+ if ((hvxInsts[startIdx].Units & b) == 0)
+ continue;
+ unsigned allBits = makeAllBits(b, hvxInsts[startIdx].Lanes);
+ if ((allBits & usedUnits) == 0) {
+ if (checkHVXPipes(hvxInsts, startIdx + 1, usedUnits | allBits))
+ return true;
+ }
+ }
+ return false;
+ }
+ return true;
+}
+
HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
MCSubtargetInfo const &STI)
: MCII(MCII), STI(STI) {
@@ -154,21 +192,82 @@ void HexagonShuffler::reset() {
Error = SHUFFLE_SUCCESS;
}
-void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender,
- unsigned S, bool X) {
- HexagonInstr PI(&TUL, MCII, ID, Extender, S, X);
+void HexagonShuffler::append(MCInst const &ID, MCInst const *Extender,
+ unsigned S) {
+ HexagonInstr PI(&TUL, MCII, &ID, Extender, S);
Packet.push_back(PI);
}
+static struct {
+ unsigned first;
+ unsigned second;
+} jumpSlots[] = { {8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1} };
+#define MAX_JUMP_SLOTS (sizeof(jumpSlots)/sizeof(jumpSlots[0]))
+
+namespace {
+bool isDuplexAGroup(unsigned Opcode) {
+ switch (Opcode) {
+ case Hexagon::SA1_addi:
+ case Hexagon::SA1_addrx:
+ case Hexagon::SA1_addsp:
+ case Hexagon::SA1_and1:
+ case Hexagon::SA1_clrf:
+ case Hexagon::SA1_clrfnew:
+ case Hexagon::SA1_clrt:
+ case Hexagon::SA1_clrtnew:
+ case Hexagon::SA1_cmpeqi:
+ case Hexagon::SA1_combine0i:
+ case Hexagon::SA1_combine1i:
+ case Hexagon::SA1_combine2i:
+ case Hexagon::SA1_combine3i:
+ case Hexagon::SA1_combinerz:
+ case Hexagon::SA1_combinezr:
+ case Hexagon::SA1_dec:
+ case Hexagon::SA1_inc:
+ case Hexagon::SA1_seti:
+ case Hexagon::SA1_setin1:
+ case Hexagon::SA1_sxtb:
+ case Hexagon::SA1_sxth:
+ case Hexagon::SA1_tfr:
+ case Hexagon::SA1_zxtb:
+ case Hexagon::SA1_zxth:
+ return true;
+ break;
+ default:
+ return false;
+ }
+}
+
+unsigned countNeitherAnorX(MCInstrInfo const &MCII, MCInst const &ID) {
+ unsigned Result = 0;
+ unsigned Type = HexagonMCInstrInfo::getType(MCII, ID);
+ if (Type == HexagonII::TypeDUPLEX) {
+ unsigned subInst0Opcode = ID.getOperand(0).getInst()->getOpcode();
+ unsigned subInst1Opcode = ID.getOperand(1).getInst()->getOpcode();
+ Result += !isDuplexAGroup(subInst0Opcode);
+ Result += !isDuplexAGroup(subInst1Opcode);
+ } else
+ Result += Type != HexagonII::TypeALU32_2op &&
+ Type != HexagonII::TypeALU32_3op &&
+ Type != HexagonII::TypeALU32_ADDI &&
+ Type != HexagonII::TypeS_2op &&
+ Type != HexagonII::TypeS_3op &&
+ Type != HexagonII::TypeALU64 &&
+ (Type != HexagonII::TypeM ||
+ HexagonMCInstrInfo::isFloat(MCII, ID));
+ return Result;
+}
+}
+
/// Check that the packet is legal and enforce relative insn order.
bool HexagonShuffler::check() {
// Descriptive slot masks.
const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2,
- slotThree = 0x8, slotFirstJump = 0x8, slotLastJump = 0x4,
+ slotThree = 0x8, //slotFirstJump = 0x8,
slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1;
// Highest slots for branches and stores used to keep their original order.
- unsigned slotJump = slotFirstJump;
+ //unsigned slotJump = slotFirstJump;
unsigned slotLoadStore = slotFirstLoadStore;
// Number of branches, solo branches, indirect branches.
unsigned jumps = 0, jump1 = 0;
@@ -188,36 +287,41 @@ bool HexagonShuffler::check() {
unsigned onlyNo1 = 0;
unsigned xtypeFloat = 0;
unsigned pSlot3Cnt = 0;
+ unsigned memops = 0;
+ unsigned deallocs = 0;
iterator slot3ISJ = end();
+ std::vector<iterator> foundBranches;
+ unsigned reservedSlots = 0;
// Collect information from the insns in the packet.
for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
- MCInst const *ID = ISJ->getDesc();
-
- if (HexagonMCInstrInfo::isSolo(MCII, *ID))
- solo += !ISJ->isSoloException();
- else if (HexagonMCInstrInfo::isSoloAX(MCII, *ID))
- onlyAX += !ISJ->isSoloException();
- else if (HexagonMCInstrInfo::isSoloAin1(MCII, *ID))
- onlyAin1 += !ISJ->isSoloException();
- if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32 &&
- HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeXTYPE)
- ++neitherAnorX;
- if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) {
+ MCInst const &ID = ISJ->getDesc();
+
+ if (HexagonMCInstrInfo::isSolo(MCII, ID))
+ solo++;
+ else if (HexagonMCInstrInfo::isSoloAX(MCII, ID))
+ onlyAX++;
+ else if (HexagonMCInstrInfo::isSoloAin1(MCII, ID))
+ onlyAin1++;
+ neitherAnorX += countNeitherAnorX(MCII, ID);
+ if (HexagonMCInstrInfo::prefersSlot3(MCII, ID)) {
++pSlot3Cnt;
slot3ISJ = ISJ;
}
- if (HexagonMCInstrInfo::isCofMax1(MCII, *ID))
+ reservedSlots |= HexagonMCInstrInfo::getOtherReservedSlots(MCII, STI, ID);
+ if (HexagonMCInstrInfo::isCofMax1(MCII, ID))
++jump1;
- switch (HexagonMCInstrInfo::getType(MCII, *ID)) {
- case HexagonII::TypeXTYPE:
- if (HexagonMCInstrInfo::isFloat(MCII, *ID))
+ switch (HexagonMCInstrInfo::getType(MCII, ID)) {
+ case HexagonII::TypeS_2op:
+ case HexagonII::TypeS_3op:
+ case HexagonII::TypeALU64:
+ if (HexagonMCInstrInfo::isFloat(MCII, ID))
++xtypeFloat;
break;
- case HexagonII::TypeJR:
case HexagonII::TypeJ:
++jumps;
+ foundBranches.push_back(ISJ);
break;
case HexagonII::TypeCVI_VM_VP_LDU:
++onlyNo1;
@@ -228,10 +332,14 @@ bool HexagonShuffler::check() {
case HexagonII::TypeLD:
++loads;
++memory;
- if (ISJ->Core.getUnits() == slotSingleLoad)
+ if (ISJ->Core.getUnits() == slotSingleLoad ||
+ HexagonMCInstrInfo::getType(MCII, ID) ==
+ HexagonII::TypeCVI_VM_VP_LDU)
++load0;
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn())
- ++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).isReturn()) {
+ ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
+ foundBranches.push_back(ISJ);
+ }
break;
case HexagonII::TypeCVI_VM_STU:
++onlyNo1;
@@ -241,27 +349,66 @@ bool HexagonShuffler::check() {
case HexagonII::TypeST:
++stores;
++memory;
- if (ISJ->Core.getUnits() == slotSingleStore)
+ if (ISJ->Core.getUnits() == slotSingleStore ||
+ HexagonMCInstrInfo::getType(MCII, ID) == HexagonII::TypeCVI_VM_STU)
++store0;
break;
case HexagonII::TypeV4LDST:
++loads;
++stores;
++store1;
+ ++memops;
++memory;
break;
- case HexagonII::TypeNV:
+ case HexagonII::TypeNCJ:
++memory; // NV insns are memory-like.
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch())
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).isBranch()) {
++jumps, ++jump1;
+ foundBranches.push_back(ISJ);
+ }
+ break;
+ case HexagonII::TypeV2LDST:
+ if(HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) {
+ ++loads;
+ ++memory;
+ if (ISJ->Core.getUnits() == slotSingleLoad ||
+ HexagonMCInstrInfo::getType(MCII,ID) ==
+ HexagonII::TypeCVI_VM_VP_LDU)
+ ++load0;
+ }
+ else {
+ assert(HexagonMCInstrInfo::getDesc(MCII, ID).mayStore());
+ ++memory;
+ ++stores;
+ }
break;
case HexagonII::TypeCR:
// Legacy conditional branch predicated on a register.
- case HexagonII::TypeSYSTEM:
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad())
- ++loads;
+ case HexagonII::TypeCJ:
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).isBranch()) {
+ ++jumps;
+ foundBranches.push_back(ISJ);
+ }
+ break;
+ case HexagonII::TypeDUPLEX: {
+ ++duplex;
+ MCInst const &Inst0 = *ID.getOperand(0).getInst();
+ MCInst const &Inst1 = *ID.getOperand(1).getInst();
+ if (HexagonMCInstrInfo::isCofMax1(MCII, Inst0))
+ ++jump1;
+ if (HexagonMCInstrInfo::isCofMax1(MCII, Inst1))
+ ++jump1;
+ if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isBranch()) {
+ ++jumps;
+ foundBranches.push_back(ISJ);
+ }
+ if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isBranch()) {
+ ++jumps;
+ foundBranches.push_back(ISJ);
+ }
break;
}
+ }
}
// Check if the packet is legal.
@@ -277,12 +424,20 @@ bool HexagonShuffler::check() {
Error = SHUFFLE_ERROR_BRANCHES;
return false;
}
+ if (memops && stores > 1) {
+ Error = SHUFFLE_ERROR_STORE_LOAD_CONFLICT;
+ return false;
+ }
+ if (deallocs && stores) {
+ Error = SHUFFLE_ERROR_STORE_LOAD_CONFLICT;
+ return false;
+ }
// Modify packet accordingly.
// TODO: need to reserve slots #0 and #1 for duplex insns.
bool bOnlySlot3 = false;
for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
- MCInst const *ID = ISJ->getDesc();
+ MCInst const &ID = ISJ->getDesc();
if (!ISJ->Core.getUnits()) {
// Error if insn may not be executed in any slot.
@@ -291,40 +446,26 @@ bool HexagonShuffler::check() {
}
// Exclude from slot #1 any insn but A2_nop.
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).getOpcode() != Hexagon::A2_nop)
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).getOpcode() != Hexagon::A2_nop)
if (onlyNo1)
ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne);
// Exclude from slot #1 any insn but A-type.
- if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32)
+ if (HexagonMCInstrInfo::getType(MCII, ID) != HexagonII::TypeALU32_2op &&
+ HexagonMCInstrInfo::getType(MCII, ID) != HexagonII::TypeALU32_3op &&
+ HexagonMCInstrInfo::getType(MCII, ID) != HexagonII::TypeALU32_ADDI)
if (onlyAin1)
ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne);
- // Branches must keep the original order.
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch() ||
- HexagonMCInstrInfo::getDesc(MCII, *ID).isCall())
- if (jumps > 1) {
- if (slotJump < slotLastJump) {
- // Error if indirect branch with another branch or
- // no more slots available for branches.
- Error = SHUFFLE_ERROR_BRANCHES;
- return false;
- }
- // Pin the branch to the highest slot available to it.
- ISJ->Core.setUnits(ISJ->Core.getUnits() & slotJump);
- // Update next highest slot available to branches.
- slotJump >>= 1;
- }
-
// A single load must use slot #0.
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) {
- if (loads == 1 && loads == memory)
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) {
+ if (loads == 1 && loads == memory && memops == 0)
// Pin the load to slot #0.
ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad);
}
// A single store must use slot #0.
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayStore()) {
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).mayStore()) {
if (!store0) {
if (stores == 1)
ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore);
@@ -347,7 +488,7 @@ bool HexagonShuffler::check() {
}
}
- // flag if an instruction can only be executed in slot 3
+ // flag if an instruction requires to be in slot 3
if (ISJ->Core.getUnits() == slotThree)
bOnlySlot3 = true;
@@ -358,14 +499,61 @@ bool HexagonShuffler::check() {
}
}
+ // preserve branch order
bool validateSlots = true;
- if (bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) {
+ if (jumps > 1) {
+ if (foundBranches.size() > 2) {
+ Error = SHUFFLE_ERROR_BRANCHES;
+ return false;
+ }
+
+ // try all possible choices
+ for (unsigned int i = 0 ; i < MAX_JUMP_SLOTS ; ++i) {
+ // validate first jump with this slot rule
+ if (!(jumpSlots[i].first & foundBranches[0]->Core.getUnits()))
+ continue;
+
+ // validate second jump with this slot rule
+ if (!(jumpSlots[i].second & foundBranches[1]->Core.getUnits()))
+ continue;
+
+ // both valid for this configuration, set new slot rules
+ PacketSave = Packet;
+ foundBranches[0]->Core.setUnits(jumpSlots[i].first);
+ foundBranches[1]->Core.setUnits(jumpSlots[i].second);
+
+ HexagonUnitAuction AuctionCore(reservedSlots);
+ std::sort(begin(), end(), HexagonInstr::lessCore);
+
+ // see if things ok with that instruction being pinned to slot "slotJump"
+ bool bFail = false;
+ for (iterator I = begin(); I != end() && bFail != true; ++I)
+ if (!AuctionCore.bid(I->Core.getUnits()))
+ bFail = true;
+
+ // if yes, great, if not then restore original slot mask
+ if (!bFail) {
+ validateSlots = false; // all good, no need to re-do auction
+ break;
+ }
+ else
+ // restore original values
+ Packet = PacketSave;
+ }
+ if (validateSlots == true) {
+ Error = SHUFFLE_ERROR_NOSLOTS;
+ return false;
+ }
+ }
+
+ if (jumps <= 1 && bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) {
+ validateSlots = true;
// save off slot mask of instruction marked with A_PREFER_SLOT3
// and then pin it to slot #3
unsigned saveUnits = slot3ISJ->Core.getUnits();
slot3ISJ->Core.setUnits(saveUnits & slotThree);
- HexagonUnitAuction AuctionCore;
+ HexagonUnitAuction AuctionCore(reservedSlots);
std::sort(begin(), end(), HexagonInstr::lessCore);
// see if things ok with that instruction being pinned to slot #3
@@ -379,16 +567,16 @@ bool HexagonShuffler::check() {
validateSlots = false; // all good, no need to re-do auction
else
for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
- MCInst const *ID = ISJ->getDesc();
- if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID))
+ MCInst const &ID = ISJ->getDesc();
+ if (HexagonMCInstrInfo::prefersSlot3(MCII, ID))
ISJ->Core.setUnits(saveUnits);
}
}
- // Check if any slot, core, is over-subscribed.
+ // Check if any slot, core or CVI, is over-subscribed.
// Verify the core slot subscriptions.
if (validateSlots) {
- HexagonUnitAuction AuctionCore;
+ HexagonUnitAuction AuctionCore(reservedSlots);
std::sort(begin(), end(), HexagonInstr::lessCore);
@@ -399,17 +587,27 @@ bool HexagonShuffler::check() {
}
}
// Verify the CVI slot subscriptions.
- {
- HexagonUnitAuction AuctionCVI;
-
- std::sort(begin(), end(), HexagonInstr::lessCVI);
-
- for (iterator I = begin(); I != end(); ++I)
- for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid?
- if (!AuctionCVI.bid(I->CVI.getUnits() << i)) {
- Error = SHUFFLE_ERROR_SLOTS;
- return false;
- }
+ std::sort(begin(), end(), HexagonInstr::lessCVI);
+ // create vector of hvx instructions to check
+ HVXInstsT hvxInsts;
+ hvxInsts.clear();
+ for (iterator I = begin(); I != end(); ++I) {
+ struct CVIUnits inst;
+ inst.Units = I->CVI.getUnits();
+ inst.Lanes = I->CVI.getLanes();
+ if (inst.Units == 0)
+ continue; // not an hvx inst or an hvx inst that doesn't uses any pipes
+ hvxInsts.push_back(inst);
+ }
+ // if there are any hvx instructions in this packet, check pipe usage
+ if (hvxInsts.size() > 0) {
+ unsigned startIdx, usedUnits;
+ startIdx = usedUnits = 0x0;
+ if (checkHVXPipes(hvxInsts, startIdx, usedUnits) == false) {
+ // too many pipes used to be valid
+ Error = SHUFFLE_ERROR_SLOTS;
+ return false;
+ }
}
Error = SHUFFLE_SUCCESS;
@@ -452,10 +650,12 @@ bool HexagonShuffler::shuffle() {
}
for (iterator ISJ = begin(); ISJ != end(); ++ISJ)
- DEBUG(dbgs().write_hex(ISJ->Core.getUnits());
- dbgs() << ':'
- << HexagonMCInstrInfo::getDesc(MCII, *ISJ->getDesc())
- .getOpcode();
+ DEBUG(dbgs().write_hex(ISJ->Core.getUnits()); if (ISJ->CVI.isValid()) {
+ dbgs() << '/';
+ dbgs().write_hex(ISJ->CVI.getUnits()) << '|';
+ dbgs() << ISJ->CVI.getLanes();
+ } dbgs() << ':'
+ << HexagonMCInstrInfo::getDesc(MCII, ISJ->getDesc()).getOpcode();
dbgs() << '\n');
DEBUG(dbgs() << '\n');
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index a093f8545132..36e8fa19d467 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -35,7 +35,8 @@ public:
HexagonResource(unsigned s) { setUnits(s); };
void setUnits(unsigned s) {
- Slots = s & ~(~0U << HEXAGON_PACKET_SIZE);
+ Slots = s & ((1u << HEXAGON_PACKET_SIZE) - 1);
+ setWeight(s);
};
unsigned setWeight(unsigned s);
@@ -44,7 +45,8 @@ public:
// Check if the resources are in ascending slot order.
static bool lessUnits(const HexagonResource &A, const HexagonResource &B) {
- return (countPopulation(A.getUnits()) < countPopulation(B.getUnits()));
+ return (countPopulation(A.getUnits()) <
+ countPopulation(B.getUnits()));
};
// Check if the resources are in ascending weight order.
static bool lessWeight(const HexagonResource &A, const HexagonResource &B) {
@@ -86,10 +88,10 @@ public:
unsigned s, MCInst const *id);
static void SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU);
- bool isValid() const { return (Valid); };
- unsigned getLanes() const { return (Lanes); };
- bool mayLoad() const { return (Load); };
- bool mayStore() const { return (Store); };
+ bool isValid() const { return Valid; };
+ unsigned getLanes() const { return Lanes; };
+ bool mayLoad() const { return Load; };
+ bool mayStore() const { return Store; };
};
// Handle to an insn used by the shuffling algorithm.
@@ -100,21 +102,17 @@ class HexagonInstr {
MCInst const *Extender;
HexagonResource Core;
HexagonCVIResource CVI;
- bool SoloException;
public:
HexagonInstr(HexagonCVIResource::TypeUnitsAndLanes *T,
MCInstrInfo const &MCII, MCInst const *id,
- MCInst const *Extender, unsigned s, bool x = false)
- : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id),
- SoloException(x) {};
+ MCInst const *Extender, unsigned s)
+ : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id) {}
- MCInst const *getDesc() const { return (ID); };
+ MCInst const &getDesc() const { return *ID; };
MCInst const *getExtender() const { return Extender; }
- unsigned isSoloException() const { return (SoloException); };
-
// Check if the handles are in ascending order for shuffling purposes.
bool operator<(const HexagonInstr &B) const {
return (HexagonResource::lessWeight(B.Core, Core));
@@ -136,6 +134,7 @@ class HexagonShuffler {
// Insn handles in a bundle.
HexagonPacket Packet;
+ HexagonPacket PacketSave;
// Shuffling error code.
unsigned Error;
@@ -178,8 +177,7 @@ public:
iterator end() { return (Packet.end()); };
// Add insn handle to the bundle .
- void append(MCInst const *ID, MCInst const *Extender, unsigned S,
- bool X = false);
+ void append(MCInst const &ID, MCInst const *Extender, unsigned S);
// Return the error code for the last check or shuffling of the bundle.
void setError(unsigned Err) { Error = Err; };
diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp
index 392871628d98..57ce9fabc5e3 100644
--- a/lib/Target/Hexagon/RDFCopy.cpp
+++ b/lib/Target/Hexagon/RDFCopy.cpp
@@ -11,6 +11,7 @@
#include "RDFCopy.h"
#include "RDFGraph.h"
+#include "RDFLiveness.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -53,47 +54,12 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM) {
CopyMap.insert(std::make_pair(SA.Id, EM));
Copies.push_back(SA.Id);
-
- for (auto I : EM) {
- auto FS = DefM.find(I.second.Reg);
- if (FS == DefM.end() || FS->second.empty())
- continue; // Undefined source
- RDefMap[I.second][SA.Id] = FS->second.top()->Id;
- // Insert DstR into the map.
- RDefMap[I.first];
- }
-}
-
-
-void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) {
- RegisterSet RRs;
- for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
- RRs.insert(RA.Addr->getRegRef(DFG));
- bool Common = false;
- for (auto &R : RDefMap) {
- if (!RRs.count(R.first))
- continue;
- Common = true;
- break;
- }
- if (!Common)
- return;
-
- for (auto &R : RDefMap) {
- if (!RRs.count(R.first))
- continue;
- auto F = DefM.find(R.first.Reg);
- if (F == DefM.end() || F->second.empty())
- continue;
- R.second[IA.Id] = F->second.top()->Id;
- }
}
bool CopyPropagation::scanBlock(MachineBasicBlock *B) {
bool Changed = false;
auto BA = DFG.getFunc().Addr->findBlock(B, DFG);
- DFG.markBlock(BA.Id, DefM);
for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
if (DFG.IsCode<NodeAttrs::Stmt>(IA)) {
@@ -102,20 +68,30 @@ bool CopyPropagation::scanBlock(MachineBasicBlock *B) {
if (interpretAsCopy(SA.Addr->getCode(), EM))
recordCopy(SA, EM);
}
-
- updateMap(IA);
- DFG.pushDefs(IA, DefM);
}
MachineDomTreeNode *N = MDT.getNode(B);
for (auto I : *N)
Changed |= scanBlock(I->getBlock());
- DFG.releaseBlock(BA.Id, DefM);
return Changed;
}
+NodeId CopyPropagation::getLocalReachingDef(RegisterRef RefRR,
+ NodeAddr<InstrNode*> IA) {
+ NodeAddr<RefNode*> RA = L.getNearestAliasedRef(RefRR, IA);
+ if (RA.Id != 0) {
+ if (RA.Addr->getKind() == NodeAttrs::Def)
+ return RA.Id;
+ assert(RA.Addr->getKind() == NodeAttrs::Use);
+ if (NodeId RD = RA.Addr->getReachingDef())
+ return RD;
+ }
+ return 0;
+}
+
+
bool CopyPropagation::run() {
scanBlock(&DFG.getMF().front());
@@ -129,14 +105,6 @@ bool CopyPropagation::run() {
<< Print<RegisterRef>(J.second, DFG);
dbgs() << " }\n";
}
- dbgs() << "\nRDef map:\n";
- for (auto R : RDefMap) {
- dbgs() << Print<RegisterRef>(R.first, DFG) << " -> {";
- for (auto &M : R.second)
- dbgs() << ' ' << Print<NodeId>(M.first, DFG) << ':'
- << Print<NodeId>(M.second, DFG);
- dbgs() << " }\n";
- }
}
bool Changed = false;
@@ -176,8 +144,7 @@ bool CopyPropagation::run() {
if (DR == SR)
continue;
- auto &RDefSR = RDefMap[SR];
- NodeId RDefSR_SA = RDefSR[SA.Id];
+ NodeId AtCopy = getLocalReachingDef(SR, SA);
for (NodeId N = DA.Addr->getReachedUse(), NextN; N; N = NextN) {
auto UA = DFG.addr<UseNode*>(N);
@@ -190,7 +157,8 @@ bool CopyPropagation::run() {
NodeAddr<InstrNode*> IA = UA.Addr->getOwner(DFG);
assert(DFG.IsCode<NodeAttrs::Stmt>(IA));
- if (RDefSR[IA.Id] != RDefSR_SA)
+ NodeId AtUse = getLocalReachingDef(SR, IA);
+ if (AtCopy != AtUse)
continue;
MachineOperand &Op = UA.Addr->getOp();
@@ -206,8 +174,8 @@ bool CopyPropagation::run() {
Op.setReg(NewReg);
Op.setSubReg(0);
DFG.unlinkUse(UA, false);
- if (RDefSR_SA != 0) {
- UA.Addr->linkToDef(UA.Id, DFG.addr<DefNode*>(RDefSR_SA));
+ if (AtCopy != 0) {
+ UA.Addr->linkToDef(UA.Id, DFG.addr<DefNode*>(AtCopy));
} else {
UA.Addr->setReachingDef(0);
UA.Addr->setSibling(0);
diff --git a/lib/Target/Hexagon/RDFCopy.h b/lib/Target/Hexagon/RDFCopy.h
index 5ece11bd5ce4..bbd625c5f5f6 100644
--- a/lib/Target/Hexagon/RDFCopy.h
+++ b/lib/Target/Hexagon/RDFCopy.h
@@ -11,6 +11,9 @@
#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
#include <map>
#include <vector>
@@ -24,7 +27,7 @@ namespace rdf {
struct CopyPropagation {
CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg),
- Trace(false) {}
+ L(dfg.getMF().getRegInfo(), dfg), Trace(false) {}
virtual ~CopyPropagation() = default;
@@ -39,18 +42,16 @@ namespace rdf {
private:
const MachineDominatorTree &MDT;
DataFlowGraph &DFG;
- DataFlowGraph::DefStackMap DefM;
+ Liveness L;
bool Trace;
- // map: register -> (map: stmt -> reaching def)
- std::map<RegisterRef,std::map<NodeId,NodeId>> RDefMap;
// map: statement -> (map: dst reg -> src reg)
std::map<NodeId, EqualityMap> CopyMap;
std::vector<NodeId> Copies;
void recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM);
- void updateMap(NodeAddr<InstrNode*> IA);
bool scanBlock(MachineBasicBlock *B);
+ NodeId getLocalReachingDef(RegisterRef RefRR, NodeAddr<InstrNode*> IA);
};
} // end namespace rdf
diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp
index 63177d51cada..9aa8ad68e07e 100644
--- a/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -62,9 +62,19 @@ bool DeadCodeElimination::isLiveInstr(const MachineInstr *MI) const {
return true;
if (MI->isPHI())
return false;
- for (auto &Op : MI->operands())
+ for (auto &Op : MI->operands()) {
if (Op.isReg() && MRI.isReserved(Op.getReg()))
return true;
+ if (Op.isRegMask()) {
+ const uint32_t *BM = Op.getRegMask();
+ for (unsigned R = 0, RN = DFG.getTRI().getNumRegs(); R != RN; ++R) {
+ if (BM[R/32] & (1u << (R%32)))
+ continue;
+ if (MRI.isReserved(R))
+ return true;
+ }
+ }
+ }
return false;
}
diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp
index fa272ea1a76a..7a2895aa4e8c 100644
--- a/lib/Target/Hexagon/RDFGraph.cpp
+++ b/lib/Target/Hexagon/RDFGraph.cpp
@@ -276,7 +276,7 @@ raw_ostream &operator<< (raw_ostream &OS,
MachineBasicBlock *BB = P.Obj.Addr->getCode();
unsigned NP = BB->pred_size();
std::vector<int> Ns;
- auto PrintBBs = [&OS,&P] (std::vector<int> Ns) -> void {
+ auto PrintBBs = [&OS] (std::vector<int> Ns) -> void {
unsigned N = Ns.size();
for (int I : Ns) {
OS << "BB#" << I;
@@ -424,7 +424,7 @@ RegisterRef RefNode::getRegRef(const DataFlowGraph &G) const {
if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)
return G.unpack(Ref.PR);
assert(Ref.Op != nullptr);
- return G.makeRegRef(Ref.Op->getReg(), Ref.Op->getSubReg());
+ return G.makeRegRef(*Ref.Op);
}
// Set the register reference in the reference node directly (for references
@@ -617,8 +617,12 @@ bool TargetOperandInfo::isPreserving(const MachineInstr &In, unsigned OpNum)
// Check if the definition of RR produces an unspecified value.
bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum)
const {
+ const MachineOperand &Op = In.getOperand(OpNum);
+ if (Op.isRegMask())
+ return true;
+ assert(Op.isReg());
if (In.isCall())
- if (In.getOperand(OpNum).isImplicit())
+ if (Op.isDef() && Op.isDead())
return true;
return false;
}
@@ -654,109 +658,6 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
return false;
}
-RegisterRef RegisterAggr::normalize(RegisterRef RR) const {
- RegisterId SuperReg = RR.Reg;
- while (true) {
- MCSuperRegIterator SR(SuperReg, &TRI, false);
- if (!SR.isValid())
- break;
- SuperReg = *SR;
- }
-
- const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg);
- LaneBitmask Common = RR.Mask & RC.LaneMask;
- uint32_t Sub = TRI.getSubRegIndex(SuperReg, RR.Reg);
- LaneBitmask SuperMask = TRI.composeSubRegIndexLaneMask(Sub, Common);
- return RegisterRef(SuperReg, SuperMask);
-}
-
-bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
- RegisterRef NR = normalize(RR);
- auto F = Masks.find(NR.Reg);
- if (F != Masks.end()) {
- if ((F->second & NR.Mask).any())
- return true;
- }
- if (CheckUnits) {
- for (MCRegUnitIterator U(RR.Reg, &TRI); U.isValid(); ++U)
- if (ExpAliasUnits.test(*U))
- return true;
- }
- return false;
-}
-
-bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
- // Always have a cover for empty lane mask.
- RegisterRef NR = normalize(RR);
- if (NR.Mask.none())
- return true;
- auto F = Masks.find(NR.Reg);
- if (F == Masks.end())
- return false;
- return (NR.Mask & F->second) == NR.Mask;
-}
-
-RegisterAggr &RegisterAggr::insert(RegisterRef RR) {
- RegisterRef NR = normalize(RR);
- auto F = Masks.find(NR.Reg);
- if (F == Masks.end())
- Masks.insert({NR.Reg, NR.Mask});
- else
- F->second |= NR.Mask;
-
- // Visit all register units to see if there are any that were created
- // by explicit aliases. Add those that were to the bit vector.
- for (MCRegUnitIterator U(RR.Reg, &TRI); U.isValid(); ++U) {
- MCRegUnitRootIterator R(*U, &TRI);
- ++R;
- if (!R.isValid())
- continue;
- ExpAliasUnits.set(*U);
- CheckUnits = true;
- }
- return *this;
-}
-
-RegisterAggr &RegisterAggr::insert(const RegisterAggr &RG) {
- for (std::pair<RegisterId,LaneBitmask> P : RG.Masks)
- insert(RegisterRef(P.first, P.second));
- return *this;
-}
-
-RegisterAggr &RegisterAggr::clear(RegisterRef RR) {
- RegisterRef NR = normalize(RR);
- auto F = Masks.find(NR.Reg);
- if (F == Masks.end())
- return *this;
- LaneBitmask NewM = F->second & ~NR.Mask;
- if (NewM.none())
- Masks.erase(F);
- else
- F->second = NewM;
- return *this;
-}
-
-RegisterAggr &RegisterAggr::clear(const RegisterAggr &RG) {
- for (std::pair<RegisterId,LaneBitmask> P : RG.Masks)
- clear(RegisterRef(P.first, P.second));
- return *this;
-}
-
-RegisterRef RegisterAggr::clearIn(RegisterRef RR) const {
- RegisterAggr T(TRI);
- T.insert(RR).clear(*this);
- if (T.empty())
- return RegisterRef();
- return RegisterRef(T.begin()->first, T.begin()->second);
-}
-
-void RegisterAggr::print(raw_ostream &OS) const {
- OS << '{';
- for (auto I : Masks)
- OS << ' ' << PrintReg(I.first, &TRI) << PrintLaneMaskOpt(I.second);
- OS << " }";
-}
-
//
// The data flow graph construction.
//
@@ -764,7 +665,8 @@ void RegisterAggr::print(raw_ostream &OS) const {
DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
- : MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
+ : MF(mf), TII(tii), TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(toi),
+ LiveIns(PRI) {
}
// The implementation of the definition stack.
@@ -857,17 +759,6 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
// Register information.
-// Get the list of references aliased to RR. Lane masks are ignored.
-RegisterSet DataFlowGraph::getAliasSet(RegisterId Reg) const {
- // Do not include RR in the alias set.
- RegisterSet AS;
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
-
- for (MCRegAliasIterator AI(Reg, &TRI, false); AI.isValid(); ++AI)
- AS.insert(RegisterRef(*AI));
- return AS;
-}
-
RegisterSet DataFlowGraph::getLandingPadLiveIns() const {
RegisterSet LR;
const Function &F = *MF.getFunction();
@@ -1010,11 +901,22 @@ void DataFlowGraph::build(unsigned Options) {
BlockRefsMap RefM;
buildBlockRefs(EA, RefM);
- // Add function-entry phi nodes.
+ // Collect function live-ins and entry block live-ins.
MachineRegisterInfo &MRI = MF.getRegInfo();
- for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
+ MachineBasicBlock &EntryB = *EA.Addr->getCode();
+ assert(EntryB.pred_empty() && "Function entry block has predecessors");
+ for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I)
+ LiveIns.insert(RegisterRef(I->first));
+ if (MRI.tracksLiveness()) {
+ for (auto I : EntryB.liveins())
+ LiveIns.insert(RegisterRef(I.PhysReg, I.LaneMask));
+ }
+
+ // Add function-entry phi nodes for the live-in registers.
+ //for (std::pair<RegisterId,LaneBitmask> P : LiveIns) {
+ for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) {
+ RegisterRef RR = *I;
NodeAddr<PhiNode*> PA = newPhi(EA);
- RegisterRef RR = RegisterRef(I->first);
uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
PA.Addr->addMember(DA, *this);
@@ -1071,27 +973,19 @@ void DataFlowGraph::build(unsigned Options) {
}
RegisterRef DataFlowGraph::makeRegRef(unsigned Reg, unsigned Sub) const {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(PhysicalRegisterInfo::isRegMaskId(Reg) ||
+ TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(Reg != 0);
if (Sub != 0)
Reg = TRI.getSubReg(Reg, Sub);
return RegisterRef(Reg);
}
-RegisterRef DataFlowGraph::normalizeRef(RegisterRef RR) const {
- // FIXME copied from RegisterAggr
- RegisterId SuperReg = RR.Reg;
- while (true) {
- MCSuperRegIterator SR(SuperReg, &TRI, false);
- if (!SR.isValid())
- break;
- SuperReg = *SR;
- }
-
- uint32_t Sub = TRI.getSubRegIndex(SuperReg, RR.Reg);
- const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg);
- LaneBitmask SuperMask = RR.Mask &
- TRI.composeSubRegIndexLaneMask(Sub, RC.LaneMask);
- return RegisterRef(SuperReg, SuperMask);
+RegisterRef DataFlowGraph::makeRegRef(const MachineOperand &Op) const {
+ assert(Op.isReg() || Op.isRegMask());
+ if (Op.isReg())
+ return makeRegRef(Op.getReg(), Op.getSubReg());
+ return RegisterRef(PRI.getRegMaskId(Op.getRegMask()), LaneBitmask::getAll());
}
RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const {
@@ -1100,13 +994,13 @@ RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const {
return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef();
}
#ifndef NDEBUG
- RegisterRef NAR = normalizeRef(AR);
- RegisterRef NBR = normalizeRef(BR);
- assert(NAR.Reg != NBR.Reg);
+// RegisterRef NAR = PRI.normalize(AR);
+// RegisterRef NBR = PRI.normalize(BR);
+// assert(NAR.Reg != NBR.Reg);
#endif
// This isn't strictly correct, because the overlap may happen in the
// part masked out.
- if (TRI.regsOverlap(AR.Reg, BR.Reg))
+ if (PRI.alias(AR, BR))
return AR;
return RegisterRef();
}
@@ -1137,11 +1031,61 @@ void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) {
// Push all definitions from the instruction node IA to an appropriate
// stack in DefM.
+void DataFlowGraph::pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+ pushClobbers(IA, DefM);
+ pushDefs(IA, DefM);
+}
+
+// Push all definitions from the instruction node IA to an appropriate
+// stack in DefM.
+void DataFlowGraph::pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+ NodeSet Visited;
+ std::set<RegisterId> Defined;
+
+ // The important objectives of this function are:
+ // - to be able to handle instructions both while the graph is being
+ // constructed, and after the graph has been constructed, and
+ // - maintain proper ordering of definitions on the stack for each
+ // register reference:
+ // - if there are two or more related defs in IA (i.e. coming from
+ // the same machine operand), then only push one def on the stack,
+ // - if there are multiple unrelated defs of non-overlapping
+ // subregisters of S, then the stack for S will have both (in an
+ // unspecified order), but the order does not matter from the data-
+ // -flow perspective.
+
+ for (NodeAddr<DefNode*> DA : IA.Addr->members_if(IsDef, *this)) {
+ if (Visited.count(DA.Id))
+ continue;
+ if (!(DA.Addr->getFlags() & NodeAttrs::Clobbering))
+ continue;
+
+ NodeList Rel = getRelatedRefs(IA, DA);
+ NodeAddr<DefNode*> PDA = Rel.front();
+ RegisterRef RR = PDA.Addr->getRegRef(*this);
+
+ // Push the definition on the stack for the register and all aliases.
+ // The def stack traversal in linkNodeUp will check the exact aliasing.
+ DefM[RR.Reg].push(DA);
+ Defined.insert(RR.Reg);
+ for (RegisterId A : PRI.getAliasSet(RR.Reg)) {
+ // Check that we don't push the same def twice.
+ assert(A != RR.Reg);
+ if (!Defined.count(A))
+ DefM[A].push(DA);
+ }
+ // Mark all the related defs as visited.
+ for (NodeAddr<NodeBase*> T : Rel)
+ Visited.insert(T.Id);
+ }
+}
+
+// Push all definitions from the instruction node IA to an appropriate
+// stack in DefM.
void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
- NodeList Defs = IA.Addr->members_if(IsDef, *this);
NodeSet Visited;
#ifndef NDEBUG
- RegisterSet Defined;
+ std::set<RegisterId> Defined;
#endif
// The important objectives of this function are:
@@ -1156,9 +1100,11 @@ void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
// unspecified order), but the order does not matter from the data-
// -flow perspective.
- for (NodeAddr<DefNode*> DA : Defs) {
+ for (NodeAddr<DefNode*> DA : IA.Addr->members_if(IsDef, *this)) {
if (Visited.count(DA.Id))
continue;
+ if (DA.Addr->getFlags() & NodeAttrs::Clobbering)
+ continue;
NodeList Rel = getRelatedRefs(IA, DA);
NodeAddr<DefNode*> PDA = Rel.front();
@@ -1166,7 +1112,7 @@ void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
#ifndef NDEBUG
// Assert if the register is defined in two or more unrelated defs.
// This could happen if there are two or more def operands defining it.
- if (!Defined.insert(RR).second) {
+ if (!Defined.insert(RR.Reg).second) {
MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
dbgs() << "Multiple definitions of register: "
<< Print<RegisterRef>(RR, *this) << " in\n " << *MI
@@ -1177,10 +1123,10 @@ void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
// Push the definition on the stack for the register and all aliases.
// The def stack traversal in linkNodeUp will check the exact aliasing.
DefM[RR.Reg].push(DA);
- for (RegisterRef A : getAliasSet(RR.Reg /*FIXME? use RegisterRef*/)) {
+ for (RegisterId A : PRI.getAliasSet(RR.Reg)) {
// Check that we don't push the same def twice.
- assert(A != RR);
- DefM[A.Reg].push(DA);
+ assert(A != RR.Reg);
+ DefM[A].push(DA);
}
// Mark all the related defs as visited.
for (NodeAddr<NodeBase*> T : Rel)
@@ -1203,59 +1149,6 @@ NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA,
return Refs;
}
-// Return true if RA and RB overlap, false otherwise.
-bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const {
- assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg));
- assert(TargetRegisterInfo::isPhysicalRegister(RB.Reg));
-
- MCRegUnitMaskIterator UMA(RA.Reg, &TRI);
- MCRegUnitMaskIterator UMB(RB.Reg, &TRI);
- // Reg units are returned in the numerical order.
- while (UMA.isValid() && UMB.isValid()) {
- std::pair<uint32_t,LaneBitmask> PA = *UMA;
- std::pair<uint32_t,LaneBitmask> PB = *UMB;
- if (PA.first == PB.first) {
- // Lane mask of 0 (given by the iterator) should be treated as "full".
- // This can happen when the register has only one unit, or when the
- // unit corresponds to explicit aliasing. In such cases, the lane mask
- // from RegisterRef should be ignored.
- if (PA.second.none() || PB.second.none())
- return true;
-
- // At this point the common unit corresponds to a subregister. The lane
- // masks correspond to the lane mask of that unit within the original
- // register, for example assuming register quadruple q0 = r3:0, and
- // a register pair d1 = r3:2, the lane mask of r2 in q0 may be 0b0100,
- // while the lane mask of r2 in d1 may be 0b0001.
- LaneBitmask LA = PA.second & RA.Mask;
- LaneBitmask LB = PB.second & RB.Mask;
- if (LA.any() && LB.any()) {
- unsigned Root = *MCRegUnitRootIterator(PA.first, &TRI);
- // If register units were guaranteed to only have 1 bit in any lane
- // mask, the code below would not be necessary. This is because LA
- // and LB would have at most 1 bit set each, and that bit would be
- // guaranteed to correspond to the given register unit.
- uint32_t SubA = TRI.getSubRegIndex(RA.Reg, Root);
- uint32_t SubB = TRI.getSubRegIndex(RB.Reg, Root);
- const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(Root);
- LaneBitmask MaskA = TRI.reverseComposeSubRegIndexLaneMask(SubA, LA);
- LaneBitmask MaskB = TRI.reverseComposeSubRegIndexLaneMask(SubB, LB);
- if ((MaskA & MaskB & RC.LaneMask).any())
- return true;
- }
-
- ++UMA;
- ++UMB;
- continue;
- }
- if (PA.first < PB.first)
- ++UMA;
- else if (PB.first < PA.first)
- ++UMB;
- }
- return false;
-}
-
// Clear all information in the graph.
void DataFlowGraph::reset() {
Memory.clear();
@@ -1370,58 +1263,53 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
if (In.isCall())
return true;
// Is tail call?
- if (In.isBranch())
+ if (In.isBranch()) {
for (const MachineOperand &Op : In.operands())
if (Op.isGlobal() || Op.isSymbol())
return true;
+ // Assume indirect branches are calls. This is for the purpose of
+ // keeping implicit operands, and so it won't hurt on intra-function
+ // indirect branches.
+ if (In.isIndirectBranch())
+ return true;
+ }
return false;
};
auto isDefUndef = [this] (const MachineInstr &In, RegisterRef DR) -> bool {
// This instruction defines DR. Check if there is a use operand that
// would make DR live on entry to the instruction.
- for (const MachineOperand &UseOp : In.operands()) {
- if (!UseOp.isReg() || !UseOp.isUse() || UseOp.isUndef())
+ for (const MachineOperand &Op : In.operands()) {
+ if (!Op.isReg() || Op.getReg() == 0 || !Op.isUse() || Op.isUndef())
continue;
- RegisterRef UR = makeRegRef(UseOp.getReg(), UseOp.getSubReg());
- if (alias(DR, UR))
+ RegisterRef UR = makeRegRef(Op);
+ if (PRI.alias(DR, UR))
return false;
}
return true;
};
- // Collect a set of registers that this instruction implicitly uses
- // or defines. Implicit operands from an instruction will be ignored
- // unless they are listed here.
- RegisterSet ImpUses, ImpDefs;
- if (const uint16_t *ImpD = In.getDesc().getImplicitDefs())
- while (uint16_t R = *ImpD++)
- ImpDefs.insert(RegisterRef(R));
- if (const uint16_t *ImpU = In.getDesc().getImplicitUses())
- while (uint16_t R = *ImpU++)
- ImpUses.insert(RegisterRef(R));
-
bool IsCall = isCall(In);
- bool NeedsImplicit = IsCall || In.isInlineAsm() || In.isReturn();
- bool IsPredicated = TII.isPredicated(In);
unsigned NumOps = In.getNumOperands();
// Avoid duplicate implicit defs. This will not detect cases of implicit
// defs that define registers that overlap, but it is not clear how to
// interpret that in the absence of explicit defs. Overlapping explicit
// defs are likely illegal already.
- RegisterSet DoneDefs;
+ BitVector DoneDefs(TRI.getNumRegs());
// Process explicit defs first.
for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
MachineOperand &Op = In.getOperand(OpN);
if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
continue;
- RegisterRef RR = makeRegRef(Op.getReg(), Op.getSubReg());
+ unsigned R = Op.getReg();
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R))
+ continue;
uint16_t Flags = NodeAttrs::None;
if (TOI.isPreserving(In, OpN)) {
Flags |= NodeAttrs::Preserving;
// If the def is preserving, check if it is also undefined.
- if (isDefUndef(In, RR))
+ if (isDefUndef(In, makeRegRef(Op)))
Flags |= NodeAttrs::Undef;
}
if (TOI.isClobbering(In, OpN))
@@ -1432,7 +1320,25 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
Flags |= NodeAttrs::Dead;
NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
SA.Addr->addMember(DA, *this);
- DoneDefs.insert(RR);
+ assert(!DoneDefs.test(R));
+ DoneDefs.set(R);
+ }
+
+ // Process reg-masks (as clobbers).
+ BitVector DoneClobbers(TRI.getNumRegs());
+ for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+ MachineOperand &Op = In.getOperand(OpN);
+ if (!Op.isRegMask())
+ continue;
+ uint16_t Flags = NodeAttrs::Clobbering | NodeAttrs::Fixed |
+ NodeAttrs::Dead;
+ NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+ SA.Addr->addMember(DA, *this);
+ // Record all clobbered registers in DoneDefs.
+ const uint32_t *RM = Op.getRegMask();
+ for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i)
+ if (!(RM[i/32] & (1u << (i%32))))
+ DoneClobbers.set(i);
}
// Process implicit defs, skipping those that have already been added
@@ -1441,11 +1347,10 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
MachineOperand &Op = In.getOperand(OpN);
if (!Op.isReg() || !Op.isDef() || !Op.isImplicit())
continue;
- RegisterRef RR = makeRegRef(Op.getReg(), Op.getSubReg());
- if (!NeedsImplicit && !ImpDefs.count(RR))
- continue;
- if (DoneDefs.count(RR))
+ unsigned R = Op.getReg();
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R) || DoneDefs.test(R))
continue;
+ RegisterRef RR = makeRegRef(Op);
uint16_t Flags = NodeAttrs::None;
if (TOI.isPreserving(In, OpN)) {
Flags |= NodeAttrs::Preserving;
@@ -1457,24 +1362,22 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
Flags |= NodeAttrs::Clobbering;
if (TOI.isFixedReg(In, OpN))
Flags |= NodeAttrs::Fixed;
- if (IsCall && Op.isDead())
+ if (IsCall && Op.isDead()) {
+ if (DoneClobbers.test(R))
+ continue;
Flags |= NodeAttrs::Dead;
+ }
NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
SA.Addr->addMember(DA, *this);
- DoneDefs.insert(RR);
+ DoneDefs.set(R);
}
for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
MachineOperand &Op = In.getOperand(OpN);
if (!Op.isReg() || !Op.isUse())
continue;
- RegisterRef RR = makeRegRef(Op.getReg(), Op.getSubReg());
- // Add implicit uses on return and call instructions, and on predicated
- // instructions regardless of whether or not they appear in the instruction
- // descriptor's list.
- bool Implicit = Op.isImplicit();
- bool TakeImplicit = NeedsImplicit || IsPredicated;
- if (Implicit && !TakeImplicit && !ImpUses.count(RR))
+ unsigned R = Op.getReg();
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R))
continue;
uint16_t Flags = NodeAttrs::None;
if (Op.isUndef())
@@ -1570,7 +1473,7 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
auto MaxCoverIn = [this] (RegisterRef RR, RegisterSet &RRs) -> RegisterRef {
for (RegisterRef I : RRs)
- if (I != RR && RegisterAggr::isCoverOf(I, RR, TRI))
+ if (I != RR && RegisterAggr::isCoverOf(I, RR, PRI))
RR = I;
return RR;
};
@@ -1597,7 +1500,7 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
auto Aliased = [this,&MaxRefs](RegisterRef RR,
std::vector<unsigned> &Closure) -> bool {
for (unsigned I : Closure)
- if (alias(RR, MaxRefs[I]))
+ if (PRI.alias(RR, MaxRefs[I]))
return true;
return false;
};
@@ -1708,7 +1611,7 @@ void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
NodeAddr<T> TAP;
// References from the def stack that have been examined so far.
- RegisterAggr Defs(TRI);
+ RegisterAggr Defs(PRI);
for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) {
RegisterRef QR = I->Addr->getRegRef(*this);
@@ -1744,13 +1647,15 @@ void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
}
// Create data-flow links for all reference nodes in the statement node SA.
-void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA) {
+template <typename Predicate>
+void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA,
+ Predicate P) {
#ifndef NDEBUG
RegisterSet Defs;
#endif
// Link all nodes (upwards in the data-flow) with their reaching defs.
- for (NodeAddr<RefNode*> RA : SA.Addr->members(*this)) {
+ for (NodeAddr<RefNode*> RA : SA.Addr->members_if(P, *this)) {
uint16_t Kind = RA.Addr->getKind();
assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use);
RegisterRef RR = RA.Addr->getRegRef(*this);
@@ -1779,6 +1684,13 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
// Push block delimiters.
markBlock(BA.Id, DefM);
+ auto IsClobber = [] (NodeAddr<RefNode*> RA) -> bool {
+ return IsDef(RA) && (RA.Addr->getFlags() & NodeAttrs::Clobbering);
+ };
+ auto IsNoClobber = [] (NodeAddr<RefNode*> RA) -> bool {
+ return IsDef(RA) && !(RA.Addr->getFlags() & NodeAttrs::Clobbering);
+ };
+
assert(BA.Addr && "block node address is needed to create a data-flow link");
// For each non-phi instruction in the block, link all the defs and uses
// to their reaching defs. For any member of the block (including phis),
@@ -1786,10 +1698,17 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) {
// Ignore phi nodes here. They will be linked part by part from the
// predecessors.
- if (IA.Addr->getKind() == NodeAttrs::Stmt)
- linkStmtRefs(DefM, IA);
+ if (IA.Addr->getKind() == NodeAttrs::Stmt) {
+ linkStmtRefs(DefM, IA, IsUse);
+ linkStmtRefs(DefM, IA, IsClobber);
+ }
// Push the definitions on the stack.
+ pushClobbers(IA, DefM);
+
+ if (IA.Addr->getKind() == NodeAttrs::Stmt)
+ linkStmtRefs(DefM, IA, IsNoClobber);
+
pushDefs(IA, DefM);
}
diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h
index 49d78a8b22b5..d5faca4cd6f4 100644
--- a/lib/Target/Hexagon/RDFGraph.h
+++ b/lib/Target/Hexagon/RDFGraph.h
@@ -225,6 +225,7 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
+#include "RDFRegisters.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/LaneBitmask.h"
@@ -260,7 +261,6 @@ namespace llvm {
namespace rdf {
typedef uint32_t NodeId;
- typedef uint32_t RegisterId;
struct DataFlowGraph;
@@ -412,25 +412,6 @@ namespace rdf {
AllocatorTy MemPool;
};
- struct RegisterRef {
- RegisterId Reg;
- LaneBitmask Mask;
-
- RegisterRef() : RegisterRef(0) {}
- explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
- : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
-
- operator bool() const { return Reg != 0 && Mask.any(); }
- bool operator== (const RegisterRef &RR) const {
- return Reg == RR.Reg && Mask == RR.Mask;
- }
- bool operator!= (const RegisterRef &RR) const {
- return !operator==(RR);
- }
- bool operator< (const RegisterRef &RR) const {
- return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask);
- }
- };
typedef std::set<RegisterRef> RegisterSet;
struct TargetOperandInfo {
@@ -450,39 +431,6 @@ namespace rdf {
uint32_t MaskId;
};
- // Template class for a map translating uint32_t into arbitrary types.
- // The map will act like an indexed set: upon insertion of a new object,
- // it will automatically assign a new index to it. Index of 0 is treated
- // as invalid and is never allocated.
- template <typename T, unsigned N = 32>
- struct IndexedSet {
- IndexedSet() : Map() { Map.reserve(N); }
-
- T get(uint32_t Idx) const {
- // Index Idx corresponds to Map[Idx-1].
- assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
- return Map[Idx-1];
- }
-
- uint32_t insert(T Val) {
- // Linear search.
- auto F = llvm::find(Map, Val);
- if (F != Map.end())
- return F - Map.begin() + 1;
- Map.push_back(Val);
- return Map.size(); // Return actual_index + 1.
- }
-
- uint32_t find(T Val) const {
- auto F = llvm::find(Map, Val);
- assert(F != Map.end());
- return F - Map.begin();
- }
-
- private:
- std::vector<T> Map;
- };
-
struct LaneMaskIndex : private IndexedSet<LaneBitmask> {
LaneMaskIndex() = default;
@@ -497,55 +445,6 @@ namespace rdf {
assert(LM.any());
return LM.all() ? 0 : find(LM);
}
-
- PackedRegisterRef pack(RegisterRef RR) {
- return { RR.Reg, getIndexForLaneMask(RR.Mask) };
- }
- PackedRegisterRef pack(RegisterRef RR) const {
- return { RR.Reg, getIndexForLaneMask(RR.Mask) };
- }
-
- RegisterRef unpack(PackedRegisterRef PR) const {
- return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId));
- }
- };
-
- struct RegisterAggr {
- RegisterAggr(const TargetRegisterInfo &tri)
- : ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), TRI(tri) {}
- RegisterAggr(const RegisterAggr &RG) = default;
-
- bool empty() const { return Masks.empty(); }
- bool hasAliasOf(RegisterRef RR) const;
- bool hasCoverOf(RegisterRef RR) const;
- static bool isCoverOf(RegisterRef RA, RegisterRef RB,
- const TargetRegisterInfo &TRI) {
- return RegisterAggr(TRI).insert(RA).hasCoverOf(RB);
- }
-
- RegisterAggr &insert(RegisterRef RR);
- RegisterAggr &insert(const RegisterAggr &RG);
- RegisterAggr &clear(RegisterRef RR);
- RegisterAggr &clear(const RegisterAggr &RG);
-
- RegisterRef clearIn(RegisterRef RR) const;
-
- void print(raw_ostream &OS) const;
-
- private:
- typedef std::unordered_map<RegisterId, LaneBitmask> MapType;
-
- public:
- typedef MapType::const_iterator iterator;
- iterator begin() const { return Masks.begin(); }
- iterator end() const { return Masks.end(); }
- RegisterRef normalize(RegisterRef RR) const;
-
- private:
- MapType Masks;
- BitVector ExpAliasUnits; // Register units for explicit aliases.
- bool CheckUnits;
- const TargetRegisterInfo &TRI;
};
struct NodeBase {
@@ -761,8 +660,10 @@ namespace rdf {
MachineFunction &getMF() const { return MF; }
const TargetInstrInfo &getTII() const { return TII; }
const TargetRegisterInfo &getTRI() const { return TRI; }
+ const PhysicalRegisterInfo &getPRI() const { return PRI; }
const MachineDominatorTree &getDT() const { return MDT; }
const MachineDominanceFrontier &getDF() const { return MDF; }
+ const RegisterAggr &getLiveIns() const { return LiveIns; }
struct DefStack {
DefStack() = default;
@@ -828,15 +729,22 @@ namespace rdf {
typedef std::unordered_map<RegisterId,DefStack> DefStackMap;
void build(unsigned Options = BuildOptions::None);
- void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
+ void pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
void markBlock(NodeId B, DefStackMap &DefM);
void releaseBlock(NodeId B, DefStackMap &DefM);
- PackedRegisterRef pack(RegisterRef RR) { return LMI.pack(RR); }
- PackedRegisterRef pack(RegisterRef RR) const { return LMI.pack(RR); }
- RegisterRef unpack(PackedRegisterRef PR) const { return LMI.unpack(PR); }
+ PackedRegisterRef pack(RegisterRef RR) {
+ return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) };
+ }
+ PackedRegisterRef pack(RegisterRef RR) const {
+ return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) };
+ }
+ RegisterRef unpack(PackedRegisterRef PR) const {
+ return RegisterRef(PR.Reg, LMI.getLaneMaskForIndex(PR.MaskId));
+ }
+
RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const;
- RegisterRef normalizeRef(RegisterRef RR) const;
+ RegisterRef makeRegRef(const MachineOperand &Op) const;
RegisterRef restrictRef(RegisterRef AR, RegisterRef BR) const;
NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA,
@@ -853,6 +761,10 @@ namespace rdf {
NodeList getRelatedRefs(NodeAddr<InstrNode*> IA,
NodeAddr<RefNode*> RA) const;
+ NodeAddr<BlockNode*> findBlock(MachineBasicBlock *BB) const {
+ return BlockNodes.at(BB);
+ }
+
void unlinkUse(NodeAddr<UseNode*> UA, bool RemoveFromOwner) {
unlinkUseDF(UA);
if (RemoveFromOwner)
@@ -898,13 +810,9 @@ namespace rdf {
return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
}
- // Register aliasing.
- bool alias(RegisterRef RA, RegisterRef RB) const;
-
private:
void reset();
- RegisterSet getAliasSet(RegisterId Reg) const;
RegisterSet getLandingPadLiveIns() const;
NodeAddr<NodeBase*> newNode(uint16_t Attrs);
@@ -940,9 +848,12 @@ namespace rdf {
NodeAddr<BlockNode*> BA);
void removeUnusedPhis();
+ void pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DM);
+ void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
template <typename T> void linkRefUp(NodeAddr<InstrNode*> IA,
NodeAddr<T> TA, DefStack &DS);
- void linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA);
+ template <typename Predicate> void linkStmtRefs(DefStackMap &DefM,
+ NodeAddr<StmtNode*> SA, Predicate P);
void linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA);
void unlinkUseDF(NodeAddr<UseNode*> UA);
@@ -953,23 +864,21 @@ namespace rdf {
IA.Addr->removeMember(RA, *this);
}
- NodeAddr<BlockNode*> findBlock(MachineBasicBlock *BB) {
- return BlockNodes[BB];
- }
+ MachineFunction &MF;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const PhysicalRegisterInfo PRI;
+ const MachineDominatorTree &MDT;
+ const MachineDominanceFrontier &MDF;
+ const TargetOperandInfo &TOI;
+ RegisterAggr LiveIns;
NodeAddr<FuncNode*> Func;
NodeAllocator Memory;
// Local map: MachineBasicBlock -> NodeAddr<BlockNode*>
std::map<MachineBasicBlock*,NodeAddr<BlockNode*>> BlockNodes;
// Lane mask map.
LaneMaskIndex LMI;
-
- MachineFunction &MF;
- const TargetInstrInfo &TII;
- const TargetRegisterInfo &TRI;
- const MachineDominatorTree &MDT;
- const MachineDominanceFrontier &MDF;
- const TargetOperandInfo &TOI;
}; // struct DataFlowGraph
template <typename Predicate>
@@ -1013,12 +922,6 @@ namespace rdf {
return MM;
}
- // Optionally print the lane mask, if it is not ~0.
- struct PrintLaneMaskOpt {
- PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
- LaneBitmask Mask;
- };
- raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P);
template <typename T> struct Print;
template <typename T>
diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp
index e74c4bfc1645..b0532f933b16 100644
--- a/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/lib/Target/Hexagon/RDFLiveness.cpp
@@ -31,11 +31,15 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
using namespace rdf;
+static cl::opt<unsigned> MaxRecNest("rdf-liveness-max-rec", cl::init(25),
+ cl::Hidden, cl::desc("Maximum recursion level"));
+
namespace llvm {
namespace rdf {
template<>
@@ -85,7 +89,8 @@ namespace rdf {
// the data-flow.
NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, bool FullChain, const RegisterAggr &DefRRs) {
+ NodeAddr<RefNode*> RefA, bool TopShadows, bool FullChain,
+ const RegisterAggr &DefRRs) {
NodeList RDefs; // Return value.
SetVector<NodeId> DefQ;
SetVector<NodeId> Owners;
@@ -105,6 +110,11 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
auto SNA = DFG.addr<RefNode*>(Start);
if (NodeId RD = SNA.Addr->getReachingDef())
DefQ.insert(RD);
+ if (TopShadows) {
+ for (auto S : DFG.getRelatedRefs(RefA.Addr->getOwner(DFG), RefA))
+ if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
+ DefQ.insert(RD);
+ }
// Collect all the reaching defs, going up until a phi node is encountered,
// or there are no more reaching defs. From this set, the actual set of
@@ -119,7 +129,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
// Stop at the covering/overwriting def of the initial register reference.
RegisterRef RR = TA.Addr->getRegRef(DFG);
if (!DFG.IsPreservingDef(TA))
- if (RegisterAggr::isCoverOf(RR, RefRR, TRI))
+ if (RegisterAggr::isCoverOf(RR, RefRR, PRI))
continue;
// Get the next level of reaching defs. This will include multiple
// reaching defs for shadows.
@@ -134,7 +144,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
for (NodeId N : DefQ) {
auto TA = DFG.addr<DefNode*>(N);
bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
- if (!IsPhi && !DFG.alias(RefRR, TA.Addr->getRegRef(DFG)))
+ if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG)))
continue;
Defs.insert(TA.Id);
Owners.insert(TA.Addr->getOwner(DFG).Id);
@@ -241,20 +251,30 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
}
-NodeSet Liveness::getAllReachingDefsRec(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs) {
+std::pair<NodeSet,bool>
+Liveness::getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
+ NodeSet &Visited, const NodeSet &Defs) {
+ return getAllReachingDefsRecImpl(RefRR, RefA, Visited, Defs, 0, MaxRecNest);
+}
+
+
+std::pair<NodeSet,bool>
+Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
+ NodeSet &Visited, const NodeSet &Defs, unsigned Nest, unsigned MaxNest) {
+ if (Nest > MaxNest)
+ return { NodeSet(), false };
// Collect all defined registers. Do not consider phis to be defining
// anything, only collect "real" definitions.
- RegisterAggr DefRRs(TRI);
+ RegisterAggr DefRRs(PRI);
for (NodeId D : Defs) {
const auto DA = DFG.addr<const DefNode*>(D);
if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
DefRRs.insert(DA.Addr->getRegRef(DFG));
}
- NodeList RDs = getAllReachingDefs(RefRR, RefA, true, DefRRs);
+ NodeList RDs = getAllReachingDefs(RefRR, RefA, false, true, DefRRs);
if (RDs.empty())
- return Defs;
+ return { Defs, true };
// Make a copy of the preexisting definitions and add the newly found ones.
NodeSet TmpDefs = Defs;
@@ -273,12 +293,74 @@ NodeSet Liveness::getAllReachingDefsRec(RegisterRef RefRR,
Visited.insert(PA.Id);
// Go over all phi uses and get the reaching defs for each use.
for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
- const auto &T = getAllReachingDefsRec(RefRR, U, Visited, TmpDefs);
- Result.insert(T.begin(), T.end());
+ const auto &T = getAllReachingDefsRecImpl(RefRR, U, Visited, TmpDefs,
+ Nest+1, MaxNest);
+ if (!T.second)
+ return { T.first, false };
+ Result.insert(T.first.begin(), T.first.end());
}
}
- return Result;
+ return { Result, true };
+}
+
+/// Find the nearest ref node aliased to RefRR, going upwards in the data
+/// flow, starting from the instruction immediately preceding Inst.
+NodeAddr<RefNode*> Liveness::getNearestAliasedRef(RegisterRef RefRR,
+ NodeAddr<InstrNode*> IA) {
+ NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+ NodeList Ins = BA.Addr->members(DFG);
+ NodeId FindId = IA.Id;
+ auto E = Ins.rend();
+ auto B = std::find_if(Ins.rbegin(), E,
+ [FindId] (const NodeAddr<InstrNode*> T) {
+ return T.Id == FindId;
+ });
+ // Do not scan IA (which is what B would point to).
+ if (B != E)
+ ++B;
+
+ do {
+ // Process the range of instructions from B to E.
+ for (NodeAddr<InstrNode*> I : make_range(B, E)) {
+ NodeList Refs = I.Addr->members(DFG);
+ NodeAddr<RefNode*> Clob, Use;
+ // Scan all the refs in I aliased to RefRR, and return the one that
+ // is the closest to the output of I, i.e. def > clobber > use.
+ for (NodeAddr<RefNode*> R : Refs) {
+ if (!PRI.alias(R.Addr->getRegRef(DFG), RefRR))
+ continue;
+ if (DFG.IsDef(R)) {
+ // If it's a non-clobbering def, just return it.
+ if (!(R.Addr->getFlags() & NodeAttrs::Clobbering))
+ return R;
+ Clob = R;
+ } else {
+ Use = R;
+ }
+ }
+ if (Clob.Id != 0)
+ return Clob;
+ if (Use.Id != 0)
+ return Use;
+ }
+
+ // Go up to the immediate dominator, if any.
+ MachineBasicBlock *BB = BA.Addr->getCode();
+ BA = NodeAddr<BlockNode*>();
+ if (MachineDomTreeNode *N = MDT.getNode(BB)) {
+ if ((N = N->getIDom()))
+ BA = DFG.findBlock(N->getBlock());
+ }
+ if (!BA.Id)
+ break;
+
+ Ins = BA.Addr->members(DFG);
+ B = Ins.rbegin();
+ E = Ins.rend();
+ } while (true);
+
+ return NodeAddr<RefNode*>();
}
@@ -299,7 +381,7 @@ NodeSet Liveness::getAllReachedUses(RegisterRef RefRR,
auto UA = DFG.addr<UseNode*>(U);
if (!(UA.Addr->getFlags() & NodeAttrs::Undef)) {
RegisterRef UR = UA.Addr->getRegRef(DFG);
- if (DFG.alias(RefRR, UR) && !DefRRs.hasCoverOf(UR))
+ if (PRI.alias(RefRR, UR) && !DefRRs.hasCoverOf(UR))
Uses.insert(U);
}
U = UA.Addr->getSibling();
@@ -312,7 +394,7 @@ NodeSet Liveness::getAllReachedUses(RegisterRef RefRR,
RegisterRef DR = DA.Addr->getRegRef(DFG);
// If this def is already covered, it cannot reach anything new.
// Similarly, skip it if it is not aliased to the interesting register.
- if (DefRRs.hasCoverOf(DR) || !DFG.alias(RefRR, DR))
+ if (DefRRs.hasCoverOf(DR) || !PRI.alias(RefRR, DR))
continue;
NodeSet T;
if (DFG.IsPreservingDef(DA)) {
@@ -343,6 +425,7 @@ void Liveness::computePhiInfo() {
// phi use -> (map: reaching phi -> set of registers defined in between)
std::map<NodeId,std::map<NodeId,RegisterAggr>> PhiUp;
std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation.
+ std::map<NodeId,RegisterAggr> PhiDRs; // Phi -> registers defined by it.
// Go over all phis.
for (NodeAddr<PhiNode*> PhiA : Phis) {
@@ -355,12 +438,15 @@ void Liveness::computePhiInfo() {
// For each def, add to the queue all reached (non-phi) defs.
SetVector<NodeId> DefQ;
NodeSet PhiDefs;
+ RegisterAggr DRs(PRI);
for (NodeAddr<RefNode*> R : PhiRefs) {
if (!DFG.IsRef<NodeAttrs::Def>(R))
continue;
+ DRs.insert(R.Addr->getRegRef(DFG));
DefQ.insert(R.Id);
PhiDefs.insert(R.Id);
}
+ PhiDRs.insert(std::make_pair(PhiA.Id, DRs));
// Collect the super-set of all possible reached uses. This set will
// contain all uses reached from this phi, either directly from the
@@ -377,9 +463,9 @@ void Liveness::computePhiInfo() {
NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN);
uint16_t F = A.Addr->getFlags();
if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) {
- RegisterRef R = DFG.normalizeRef(getRestrictedRegRef(A));
+ RegisterRef R = PRI.normalize(A.Addr->getRegRef(DFG));
RealUses[R.Reg].insert({A.Id,R.Mask});
- }
+ }
UN = A.Addr->getSibling();
}
// Visit all reached defs, and add them to the queue. These defs may
@@ -424,17 +510,13 @@ void Liveness::computePhiInfo() {
auto UA = DFG.addr<UseNode*>(I->first);
// Undef flag is checked above.
assert((UA.Addr->getFlags() & NodeAttrs::Undef) == 0);
- RegisterRef R(UI->first, I->second);
+ RegisterRef R(UI->first, I->second);
NodeList RDs = getAllReachingDefs(R, UA);
- if (any_of(RDs, InPhiDefs))
- ++I;
- else
- I = Uses.erase(I);
+ // If none of the reaching defs of R are from this phi, remove this
+ // use of R.
+ I = any_of(RDs, InPhiDefs) ? std::next(I) : Uses.erase(I);
}
- if (Uses.empty())
- UI = RealUses.erase(UI);
- else
- ++UI;
+ UI = Uses.empty() ? RealUses.erase(UI) : std::next(UI);
}
// If this phi reaches some "real" uses, add it to the queue for upward
@@ -452,32 +534,29 @@ void Liveness::computePhiInfo() {
for (auto I : PhiRefs) {
if (!DFG.IsRef<NodeAttrs::Use>(I) || SeenUses.count(I.Id))
continue;
- NodeAddr<UseNode*> UA = I;
-
- // Given a phi use UA, traverse all related phi uses (including UA).
- // The related phi uses may reach different phi nodes or may reach the
- // same phi node. If multiple uses reach the same phi P, the intervening
- // defs must be accumulated for all such uses. To group all such uses
- // into one set, map their node ids to the first use id that reaches P.
- std::map<NodeId,NodeId> FirstUse; // Phi reached up -> first phi use.
-
- for (NodeAddr<UseNode*> VA : DFG.getRelatedRefs(PhiA, UA)) {
- SeenUses.insert(VA.Id);
- RegisterAggr DefRRs(TRI);
- for (NodeAddr<DefNode*> DA : getAllReachingDefs(VA)) {
- if (DA.Addr->getFlags() & NodeAttrs::PhiRef) {
- NodeId RP = DA.Addr->getOwner(DFG).Id;
- NodeId FU = FirstUse.insert({RP,VA.Id}).first->second;
- std::map<NodeId,RegisterAggr> &M = PhiUp[FU];
- auto F = M.find(RP);
- if (F == M.end())
- M.insert(std::make_pair(RP, DefRRs));
- else
- F->second.insert(DefRRs);
- }
- DefRRs.insert(DA.Addr->getRegRef(DFG));
+ NodeAddr<PhiUseNode*> PUA = I;
+ if (PUA.Addr->getReachingDef() == 0)
+ continue;
+
+ RegisterRef UR = PUA.Addr->getRegRef(DFG);
+ NodeList Ds = getAllReachingDefs(UR, PUA, true, false, NoRegs);
+ RegisterAggr DefRRs(PRI);
+
+ for (NodeAddr<DefNode*> D : Ds) {
+ if (D.Addr->getFlags() & NodeAttrs::PhiRef) {
+ NodeId RP = D.Addr->getOwner(DFG).Id;
+ std::map<NodeId,RegisterAggr> &M = PhiUp[PUA.Id];
+ auto F = M.find(RP);
+ if (F == M.end())
+ M.insert(std::make_pair(RP, DefRRs));
+ else
+ F->second.insert(DefRRs);
}
+ DefRRs.insert(D.Addr->getRegRef(DFG));
}
+
+ for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PhiA, PUA))
+ SeenUses.insert(T.Id);
}
}
@@ -522,7 +601,7 @@ void Liveness::computePhiInfo() {
for (NodeAddr<UseNode*> UA : PUs) {
std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id];
- RegisterRef UR = DFG.normalizeRef(getRestrictedRegRef(UA));
+ RegisterRef UR = PRI.normalize(UA.Addr->getRegRef(DFG));
for (const std::pair<NodeId,RegisterAggr> &P : PUM) {
bool Changed = false;
const RegisterAggr &MidDefs = P.second;
@@ -540,14 +619,19 @@ void Liveness::computePhiInfo() {
// then add (R-MidDefs,U) to RealUseMap[P]
//
for (const std::pair<RegisterId,NodeRefSet> &T : RUM) {
- RegisterRef R = DFG.restrictRef(RegisterRef(T.first), UR);
- if (!R)
+ RegisterRef R(T.first);
+ // The current phi (PA) could be a phi for a regmask. It could
+ // reach a whole variety of uses that are not related to the
+ // specific upward phi (P.first).
+ const RegisterAggr &DRs = PhiDRs.at(P.first);
+ if (!DRs.hasAliasOf(R))
continue;
+ R = DRs.intersectWith(R);
for (std::pair<NodeId,LaneBitmask> V : T.second) {
- RegisterRef S = DFG.restrictRef(RegisterRef(R.Reg, V.second), R);
- if (!S)
+ LaneBitmask M = R.Mask & V.second;
+ if (M.none())
continue;
- if (RegisterRef SS = MidDefs.clearIn(S)) {
+ if (RegisterRef SS = MidDefs.clearIn(RegisterRef(R.Reg, M))) {
NodeRefSet &RS = RealUseMap[P.first][SS.Reg];
Changed |= RS.insert({V.first,SS.Mask}).second;
}
@@ -645,30 +729,43 @@ void Liveness::computeLiveIns() {
if (RUs.empty())
continue;
+ NodeSet SeenUses;
for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
+ if (!SeenUses.insert(U.Id).second)
+ continue;
NodeAddr<PhiUseNode*> PUA = U;
if (PUA.Addr->getReachingDef() == 0)
continue;
- // Mark all reached "real" uses of P as live on exit in the
- // predecessor.
- // Remap all the RUs so that they have a correct reaching def.
+ // Each phi has some set (possibly empty) of reached "real" uses,
+ // that is, uses that are part of the compiled program. Such a use
+ // may be located in some farther block, but following a chain of
+ // reaching defs will eventually lead to this phi.
+ // Any chain of reaching defs may fork at a phi node, but there
+ // will be a path upwards that will lead to this phi. Now, this
+ // chain will need to fork at this phi, since some of the reached
+ // uses may have definitions joining in from multiple predecessors.
+ // For each reached "real" use, identify the set of reaching defs
+ // coming from each predecessor P, and add them to PhiLOX[P].
+ //
auto PrA = DFG.addr<BlockNode*>(PUA.Addr->getPredecessor());
RefMap &LOX = PhiLOX[PrA.Addr->getCode()];
- RegisterRef UR = DFG.normalizeRef(getRestrictedRegRef(PUA));
- for (const std::pair<RegisterId,NodeRefSet> &T : RUs) {
- // Check if T.first aliases UR?
- LaneBitmask M;
- for (std::pair<NodeId,LaneBitmask> P : T.second)
- M |= P.second;
-
- RegisterRef S = DFG.restrictRef(RegisterRef(T.first, M), UR);
- if (!S)
- continue;
- for (NodeAddr<DefNode*> D : getAllReachingDefs(S, PUA))
- LOX[S.Reg].insert({D.Id, S.Mask});
+ for (const std::pair<RegisterId,NodeRefSet> &RS : RUs) {
+ // We need to visit each individual use.
+ for (std::pair<NodeId,LaneBitmask> P : RS.second) {
+ // Create a register ref corresponding to the use, and find
+ // all reaching defs starting from the phi use, and treating
+ // all related shadows as a single use cluster.
+ RegisterRef S(RS.first, P.second);
+ NodeList Ds = getAllReachingDefs(S, PUA, true, false, NoRegs);
+ for (NodeAddr<DefNode*> D : Ds)
+ LOX[S.Reg].insert({D.Id, S.Mask});
+ }
}
+
+ for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PA, PUA))
+ SeenUses.insert(T.Id);
} // for U : phi uses
} // for P : Phis
} // for B : Blocks
@@ -684,9 +781,7 @@ void Liveness::computeLiveIns() {
traverse(&MF.front(), LiveIn);
// Add function live-ins to the live-in set of the function entry block.
- auto &EntryIn = LiveMap[&MF.front()];
- for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I)
- EntryIn.insert(RegisterRef(I->first));
+ LiveMap[&MF.front()].insert(DFG.getLiveIns());
if (Trace) {
// Dump the liveness map
@@ -702,19 +797,9 @@ void Liveness::computeLiveIns() {
//dbgs() << "\tcomp = " << Print<RegisterAggr>(LiveMap[&B], DFG) << '\n';
LV.clear();
- for (std::pair<RegisterId,LaneBitmask> P : LiveMap[&B]) {
- MCSubRegIndexIterator S(P.first, &TRI);
- if (!S.isValid()) {
- LV.push_back(RegisterRef(P.first));
- continue;
- }
- do {
- LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex());
- if ((M & P.second).any())
- LV.push_back(RegisterRef(S.getSubReg()));
- ++S;
- } while (S.isValid());
- }
+ const RegisterAggr &LG = LiveMap[&B];
+ for (auto I = LG.rr_begin(), E = LG.rr_end(); I != E; ++I)
+ LV.push_back(*I);
std::sort(LV.begin(), LV.end());
dbgs() << "\tcomp = {";
for (auto I : LV)
@@ -735,9 +820,10 @@ void Liveness::resetLiveIns() {
for (auto I : T)
B.removeLiveIn(I);
// Add the newly computed live-ins.
- auto &LiveIns = LiveMap[&B];
- for (auto I : LiveIns) {
- B.addLiveIn({MCPhysReg(I.first), I.second});
+ const RegisterAggr &LiveIns = LiveMap[&B];
+ for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) {
+ RegisterRef R = *I;
+ B.addLiveIn({MCPhysReg(R.Reg), R.Mask});
}
}
}
@@ -791,7 +877,7 @@ void Liveness::resetKills(MachineBasicBlock *B) {
Live.reset(*SR);
}
for (auto &Op : MI->operands()) {
- if (!Op.isReg() || !Op.isUse())
+ if (!Op.isReg() || !Op.isUse() || Op.isUndef())
continue;
unsigned R = Op.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(R))
@@ -803,9 +889,8 @@ void Liveness::resetKills(MachineBasicBlock *B) {
IsLive = true;
break;
}
- if (IsLive)
- continue;
- Op.setIsKill(true);
+ if (!IsLive)
+ Op.setIsKill(true);
for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
Live.set(*SR);
}
@@ -813,17 +898,6 @@ void Liveness::resetKills(MachineBasicBlock *B) {
}
-RegisterRef Liveness::getRestrictedRegRef(NodeAddr<RefNode*> RA) const {
- assert(DFG.IsRef<NodeAttrs::Use>(RA));
- if (RA.Addr->getFlags() & NodeAttrs::Shadow) {
- NodeId RD = RA.Addr->getReachingDef();
- assert(RD);
- RA = DFG.addr<DefNode*>(RD);
- }
- return RA.Addr->getRegRef(DFG);
-}
-
-
// Helper function to obtain the basic block containing the reaching def
// of the given use.
MachineBasicBlock *Liveness::getBlockWithRef(NodeId RN) const {
@@ -921,7 +995,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
// propagated upwards. This only applies to non-preserving defs,
// and to the parts of the register actually covered by those defs.
// (Note that phi defs should always be preserving.)
- RegisterAggr RRs(TRI);
+ RegisterAggr RRs(PRI);
LRef.Mask = OR.second;
if (!DFG.IsPreservingDef(DA)) {
@@ -949,10 +1023,9 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
// registers are not covering LRef. The first def from the
// upward chain will be live.
// Subtract all accumulated defs (RRs) from LRef.
- RegisterAggr L(TRI);
- L.insert(LRef).clear(RRs);
- assert(!L.empty());
- NewDefs.insert({TA.Id,L.begin()->second});
+ RegisterRef T = RRs.clearIn(LRef);
+ assert(T);
+ NewDefs.insert({TA.Id,T.Mask});
break;
}
@@ -983,7 +1056,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
if (UA.Addr->getFlags() & NodeAttrs::Undef)
continue;
- RegisterRef RR = DFG.normalizeRef(UA.Addr->getRegRef(DFG));
+ RegisterRef RR = PRI.normalize(UA.Addr->getRegRef(DFG));
for (NodeAddr<DefNode*> D : getAllReachingDefs(UA))
if (getBlockWithRef(D.Id) != B)
LiveIn[RR.Reg].insert({D.Id,RR.Mask});
diff --git a/lib/Target/Hexagon/RDFLiveness.h b/lib/Target/Hexagon/RDFLiveness.h
index c88396f36bbb..6f2615b7c4f3 100644
--- a/lib/Target/Hexagon/RDFLiveness.h
+++ b/lib/Target/Hexagon/RDFLiveness.h
@@ -33,7 +33,7 @@ namespace rdf {
// This is really a std::map, except that it provides a non-trivial
// default constructor to the element accessed via [].
struct LiveMapType {
- LiveMapType(const TargetRegisterInfo &tri) : Empty(tri) {}
+ LiveMapType(const PhysicalRegisterInfo &pri) : Empty(pri) {}
RegisterAggr &operator[] (MachineBasicBlock *B) {
return Map.emplace(B, Empty).first->second;
@@ -49,26 +49,31 @@ namespace rdf {
typedef std::map<RegisterId,NodeRefSet> RefMap;
Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
- : DFG(g), TRI(g.getTRI()), MDT(g.getDT()), MDF(g.getDF()),
- MRI(mri), LiveMap(g.getTRI()), Empty(), NoRegs(g.getTRI()),
- Trace(false) {}
+ : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()),
+ MDF(g.getDF()), LiveMap(g.getPRI()), Empty(),
+ NoRegs(g.getPRI()), Trace(false) {}
NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- bool FullChain, const RegisterAggr &DefRRs);
+ bool TopShadows, bool FullChain, const RegisterAggr &DefRRs);
NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA) {
- return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false, NoRegs);
+ return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false,
+ false, NoRegs);
}
NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA) {
- return getAllReachingDefs(RefRR, RefA, false, NoRegs);
+ return getAllReachingDefs(RefRR, RefA, false, false, NoRegs);
}
- NodeSet getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- NodeSet &Visited, const NodeSet &Defs);
NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA,
const RegisterAggr &DefRRs);
NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA) {
return getAllReachedUses(RefRR, DefA, NoRegs);
}
+ std::pair<NodeSet,bool> getAllReachingDefsRec(RegisterRef RefRR,
+ NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs);
+
+ NodeAddr<RefNode*> getNearestAliasedRef(RegisterRef RefRR,
+ NodeAddr<InstrNode*> IA);
+
LiveMapType &getLiveMap() { return LiveMap; }
const LiveMapType &getLiveMap() const { return LiveMap; }
const RefMap &getRealUses(NodeId P) const {
@@ -87,9 +92,9 @@ namespace rdf {
private:
const DataFlowGraph &DFG;
const TargetRegisterInfo &TRI;
+ const PhysicalRegisterInfo &PRI;
const MachineDominatorTree &MDT;
const MachineDominanceFrontier &MDF;
- MachineRegisterInfo &MRI;
LiveMapType LiveMap;
const RefMap Empty;
const RegisterAggr NoRegs;
@@ -121,12 +126,13 @@ namespace rdf {
// the dominator tree), create a map: block -> set of uses live on exit.
std::map<MachineBasicBlock*,RefMap> PhiLOX;
- bool isRestrictedToRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
- RegisterRef RR) const;
- RegisterRef getRestrictedRegRef(NodeAddr<RefNode*> RA) const;
MachineBasicBlock *getBlockWithRef(NodeId RN) const;
void traverse(MachineBasicBlock *B, RefMap &LiveIn);
void emptify(RefMap &M);
+
+ std::pair<NodeSet,bool> getAllReachingDefsRecImpl(RegisterRef RefRR,
+ NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs,
+ unsigned Nest, unsigned MaxNest);
};
} // namespace rdf
} // namespace llvm
diff --git a/lib/Target/Hexagon/RDFRegisters.cpp b/lib/Target/Hexagon/RDFRegisters.cpp
new file mode 100644
index 000000000000..5c5496a548af
--- /dev/null
+++ b/lib/Target/Hexagon/RDFRegisters.cpp
@@ -0,0 +1,368 @@
+//===--- RDFRegisters.cpp ---------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RDFRegisters.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+using namespace llvm;
+using namespace rdf;
+
+PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
+ const MachineFunction &mf)
+ : TRI(tri) {
+ RegInfos.resize(TRI.getNumRegs());
+
+ BitVector BadRC(TRI.getNumRegs());
+ for (const TargetRegisterClass *RC : TRI.regclasses()) {
+ for (MCPhysReg R : *RC) {
+ RegInfo &RI = RegInfos[R];
+ if (RI.RegClass != nullptr && !BadRC[R]) {
+ if (RC->LaneMask != RI.RegClass->LaneMask) {
+ BadRC.set(R);
+ RI.RegClass = nullptr;
+ }
+ } else
+ RI.RegClass = RC;
+ }
+ }
+
+ UnitInfos.resize(TRI.getNumRegUnits());
+
+ for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) {
+ if (UnitInfos[U].Reg != 0)
+ continue;
+ MCRegUnitRootIterator R(U, &TRI);
+ assert(R.isValid());
+ RegisterId F = *R;
+ ++R;
+ if (R.isValid()) {
+ UnitInfos[U].Mask = LaneBitmask::getAll();
+ UnitInfos[U].Reg = F;
+ } else {
+ for (MCRegUnitMaskIterator I(F, &TRI); I.isValid(); ++I) {
+ std::pair<uint32_t,LaneBitmask> P = *I;
+ UnitInfo &UI = UnitInfos[P.first];
+ UI.Reg = F;
+ if (P.second.any()) {
+ UI.Mask = P.second;
+ } else {
+ if (const TargetRegisterClass *RC = RegInfos[F].RegClass)
+ UI.Mask = RC->LaneMask;
+ else
+ UI.Mask = LaneBitmask::getAll();
+ }
+ }
+ }
+ }
+
+ for (const uint32_t *RM : TRI.getRegMasks())
+ RegMasks.insert(RM);
+ for (const MachineBasicBlock &B : mf)
+ for (const MachineInstr &In : B)
+ for (const MachineOperand &Op : In.operands())
+ if (Op.isRegMask())
+ RegMasks.insert(Op.getRegMask());
+}
+
+RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const {
+ return RR;
+}
+
+std::set<RegisterId> PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const {
+ // Do not include RR in the alias set.
+ std::set<RegisterId> AS;
+ assert(isRegMaskId(Reg) || TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (isRegMaskId(Reg)) {
+ // XXX SLOW
+ const uint32_t *MB = getRegMaskBits(Reg);
+ for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
+ if (MB[i/32] & (1u << (i%32)))
+ continue;
+ AS.insert(i);
+ }
+ for (const uint32_t *RM : RegMasks) {
+ RegisterId MI = getRegMaskId(RM);
+ if (MI != Reg && aliasMM(RegisterRef(Reg), RegisterRef(MI)))
+ AS.insert(MI);
+ }
+ return AS;
+ }
+
+ for (MCRegAliasIterator AI(Reg, &TRI, false); AI.isValid(); ++AI)
+ AS.insert(*AI);
+ for (const uint32_t *RM : RegMasks) {
+ RegisterId MI = getRegMaskId(RM);
+ if (aliasRM(RegisterRef(Reg), RegisterRef(MI)))
+ AS.insert(MI);
+ }
+ return AS;
+}
+
+bool PhysicalRegisterInfo::aliasRR(RegisterRef RA, RegisterRef RB) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg));
+ assert(TargetRegisterInfo::isPhysicalRegister(RB.Reg));
+
+ MCRegUnitMaskIterator UMA(RA.Reg, &TRI);
+ MCRegUnitMaskIterator UMB(RB.Reg, &TRI);
+ // Reg units are returned in the numerical order.
+ while (UMA.isValid() && UMB.isValid()) {
+ // Skip units that are masked off in RA.
+ std::pair<RegisterId,LaneBitmask> PA = *UMA;
+ if (PA.second.any() && (PA.second & RA.Mask).none()) {
+ ++UMA;
+ continue;
+ }
+ // Skip units that are masked off in RB.
+ std::pair<RegisterId,LaneBitmask> PB = *UMB;
+ if (PB.second.any() && (PB.second & RB.Mask).none()) {
+ ++UMB;
+ continue;
+ }
+
+ if (PA.first == PB.first)
+ return true;
+ if (PA.first < PB.first)
+ ++UMA;
+ else if (PB.first < PA.first)
+ ++UMB;
+ }
+ return false;
+}
+
+bool PhysicalRegisterInfo::aliasRM(RegisterRef RR, RegisterRef RM) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg) && isRegMaskId(RM.Reg));
+ const uint32_t *MB = getRegMaskBits(RM.Reg);
+ bool Preserved = MB[RR.Reg/32] & (1u << (RR.Reg%32));
+ // If the lane mask information is "full", e.g. when the given lane mask
+ // is a superset of the lane mask from the register class, check the regmask
+ // bit directly.
+ if (RR.Mask == LaneBitmask::getAll())
+ return !Preserved;
+ const TargetRegisterClass *RC = RegInfos[RR.Reg].RegClass;
+ if (RC != nullptr && (RR.Mask & RC->LaneMask) == RC->LaneMask)
+ return !Preserved;
+
+ // Otherwise, check all subregisters whose lane mask overlaps the given
+ // mask. For each such register, if it is preserved by the regmask, then
+ // clear the corresponding bits in the given mask. If at the end, all
+ // bits have been cleared, the register does not alias the regmask (i.e.
+ // is it preserved by it).
+ LaneBitmask M = RR.Mask;
+ for (MCSubRegIndexIterator SI(RR.Reg, &TRI); SI.isValid(); ++SI) {
+ LaneBitmask SM = TRI.getSubRegIndexLaneMask(SI.getSubRegIndex());
+ if ((SM & RR.Mask).none())
+ continue;
+ unsigned SR = SI.getSubReg();
+ if (!(MB[SR/32] & (1u << (SR%32))))
+ continue;
+ // The subregister SR is preserved.
+ M &= ~SM;
+ if (M.none())
+ return false;
+ }
+
+ return true;
+}
+
+bool PhysicalRegisterInfo::aliasMM(RegisterRef RM, RegisterRef RN) const {
+ assert(isRegMaskId(RM.Reg) && isRegMaskId(RN.Reg));
+ unsigned NumRegs = TRI.getNumRegs();
+ const uint32_t *BM = getRegMaskBits(RM.Reg);
+ const uint32_t *BN = getRegMaskBits(RN.Reg);
+
+ for (unsigned w = 0, nw = NumRegs/32; w != nw; ++w) {
+ // Intersect the negations of both words. Disregard reg=0,
+ // i.e. 0th bit in the 0th word.
+ uint32_t C = ~BM[w] & ~BN[w];
+ if (w == 0)
+ C &= ~1;
+ if (C)
+ return true;
+ }
+
+ // Check the remaining registers in the last word.
+ unsigned TailRegs = NumRegs % 32;
+ if (TailRegs == 0)
+ return false;
+ unsigned TW = NumRegs / 32;
+ uint32_t TailMask = (1u << TailRegs) - 1;
+ if (~BM[TW] & ~BN[TW] & TailMask)
+ return true;
+
+ return false;
+}
+
+
+bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
+ if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
+ // XXX SLOW
+ const uint32_t *MB = PRI.getRegMaskBits(RR.Reg);
+ for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) {
+ if (MB[i/32] & (1u << (i%32)))
+ continue;
+ if (hasAliasOf(RegisterRef(i, LaneBitmask::getAll())))
+ return true;
+ }
+ return false;
+ }
+
+ for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
+ std::pair<uint32_t,LaneBitmask> P = *U;
+ if (P.second.none() || (P.second & RR.Mask).any())
+ if (Units.test(P.first))
+ return true;
+ }
+ return false;
+}
+
+bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
+ if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
+ // XXX SLOW
+ const uint32_t *MB = PRI.getRegMaskBits(RR.Reg);
+ for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) {
+ if (MB[i/32] & (1u << (i%32)))
+ continue;
+ if (!hasCoverOf(RegisterRef(i, LaneBitmask::getAll())))
+ return false;
+ }
+ return true;
+ }
+
+ for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
+ std::pair<uint32_t,LaneBitmask> P = *U;
+ if (P.second.none() || (P.second & RR.Mask).any())
+ if (!Units.test(P.first))
+ return false;
+ }
+ return true;
+}
+
+RegisterAggr &RegisterAggr::insert(RegisterRef RR) {
+ if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
+ BitVector PU(PRI.getTRI().getNumRegUnits()); // Preserved units.
+ const uint32_t *MB = PRI.getRegMaskBits(RR.Reg);
+ for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) {
+ if (!(MB[i/32] & (1u << (i%32))))
+ continue;
+ for (MCRegUnitIterator U(i, &PRI.getTRI()); U.isValid(); ++U)
+ PU.set(*U);
+ }
+ Units |= PU.flip();
+ return *this;
+ }
+
+ for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
+ std::pair<uint32_t,LaneBitmask> P = *U;
+ if (P.second.none() || (P.second & RR.Mask).any())
+ Units.set(P.first);
+ }
+ return *this;
+}
+
+RegisterAggr &RegisterAggr::insert(const RegisterAggr &RG) {
+ Units |= RG.Units;
+ return *this;
+}
+
+RegisterAggr &RegisterAggr::intersect(RegisterRef RR) {
+ return intersect(RegisterAggr(PRI).insert(RR));
+}
+
+RegisterAggr &RegisterAggr::intersect(const RegisterAggr &RG) {
+ Units &= RG.Units;
+ return *this;
+}
+
+RegisterAggr &RegisterAggr::clear(RegisterRef RR) {
+ return clear(RegisterAggr(PRI).insert(RR));
+}
+
+RegisterAggr &RegisterAggr::clear(const RegisterAggr &RG) {
+ Units.reset(RG.Units);
+ return *this;
+}
+
+RegisterRef RegisterAggr::intersectWith(RegisterRef RR) const {
+ RegisterAggr T(PRI);
+ T.insert(RR).intersect(*this);
+ if (T.empty())
+ return RegisterRef();
+ RegisterRef NR = T.makeRegRef();
+ assert(NR);
+ return NR;
+}
+
+RegisterRef RegisterAggr::clearIn(RegisterRef RR) const {
+ return RegisterAggr(PRI).insert(RR).clear(*this).makeRegRef();
+}
+
+RegisterRef RegisterAggr::makeRegRef() const {
+ int U = Units.find_first();
+ if (U < 0)
+ return RegisterRef();
+
+ auto AliasedRegs = [this] (uint32_t Unit, BitVector &Regs) {
+ for (MCRegUnitRootIterator R(Unit, &PRI.getTRI()); R.isValid(); ++R)
+ for (MCSuperRegIterator S(*R, &PRI.getTRI(), true); S.isValid(); ++S)
+ Regs.set(*S);
+ };
+
+ // Find the set of all registers that are aliased to all the units
+ // in this aggregate.
+
+ // Get all the registers aliased to the first unit in the bit vector.
+ BitVector Regs(PRI.getTRI().getNumRegs());
+ AliasedRegs(U, Regs);
+ U = Units.find_next(U);
+
+ // For each other unit, intersect it with the set of all registers
+ // aliased that unit.
+ while (U >= 0) {
+ BitVector AR(PRI.getTRI().getNumRegs());
+ AliasedRegs(U, AR);
+ Regs &= AR;
+ U = Units.find_next(U);
+ }
+
+ // If there is at least one register remaining, pick the first one,
+ // and consolidate the masks of all of its units contained in this
+ // aggregate.
+
+ int F = Regs.find_first();
+ if (F <= 0)
+ return RegisterRef();
+
+ LaneBitmask M;
+ for (MCRegUnitMaskIterator I(F, &PRI.getTRI()); I.isValid(); ++I) {
+ std::pair<uint32_t,LaneBitmask> P = *I;
+ if (Units.test(P.first))
+ M |= P.second.none() ? LaneBitmask::getAll() : P.second;
+ }
+ return RegisterRef(F, M);
+}
+
+void RegisterAggr::print(raw_ostream &OS) const {
+ OS << '{';
+ for (int U = Units.find_first(); U >= 0; U = Units.find_next(U))
+ OS << ' ' << PrintRegUnit(U, &PRI.getTRI());
+ OS << " }";
+}
+
+RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG,
+ bool End)
+ : Owner(&RG) {
+ for (int U = RG.Units.find_first(); U >= 0; U = RG.Units.find_next(U)) {
+ RegisterRef R = RG.PRI.getRefForUnit(U);
+ Masks[R.Reg] |= R.Mask;
+ }
+ Pos = End ? Masks.end() : Masks.begin();
+ Index = End ? Masks.size() : 0;
+}
+
diff --git a/lib/Target/Hexagon/RDFRegisters.h b/lib/Target/Hexagon/RDFRegisters.h
new file mode 100644
index 000000000000..4b35c85a6b62
--- /dev/null
+++ b/lib/Target/Hexagon/RDFRegisters.h
@@ -0,0 +1,209 @@
+//===--- RDFRegisters.h -----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <set>
+#include <unordered_map>
+#include <vector>
+
+namespace llvm {
+namespace rdf {
+
+ typedef uint32_t RegisterId;
+
+ // Template class for a map translating uint32_t into arbitrary types.
+ // The map will act like an indexed set: upon insertion of a new object,
+ // it will automatically assign a new index to it. Index of 0 is treated
+ // as invalid and is never allocated.
+ template <typename T, unsigned N = 32>
+ struct IndexedSet {
+ IndexedSet() : Map() { Map.reserve(N); }
+
+ T get(uint32_t Idx) const {
+ // Index Idx corresponds to Map[Idx-1].
+ assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
+ return Map[Idx-1];
+ }
+
+ uint32_t insert(T Val) {
+ // Linear search.
+ auto F = llvm::find(Map, Val);
+ if (F != Map.end())
+ return F - Map.begin() + 1;
+ Map.push_back(Val);
+ return Map.size(); // Return actual_index + 1.
+ }
+
+ uint32_t find(T Val) const {
+ auto F = llvm::find(Map, Val);
+ assert(F != Map.end());
+ return F - Map.begin() + 1;
+ }
+
+ typedef typename std::vector<T>::const_iterator const_iterator;
+ const_iterator begin() const { return Map.begin(); }
+ const_iterator end() const { return Map.end(); }
+
+ private:
+ std::vector<T> Map;
+ };
+
+ struct RegisterRef {
+ RegisterId Reg = 0;
+ LaneBitmask Mask = LaneBitmask::getNone();
+
+ RegisterRef() = default;
+ explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
+ : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
+
+ operator bool() const {
+ return Reg != 0 && Mask.any();
+ }
+ bool operator== (const RegisterRef &RR) const {
+ return Reg == RR.Reg && Mask == RR.Mask;
+ }
+ bool operator!= (const RegisterRef &RR) const {
+ return !operator==(RR);
+ }
+ bool operator< (const RegisterRef &RR) const {
+ return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask);
+ }
+ };
+
+
+ struct PhysicalRegisterInfo {
+ PhysicalRegisterInfo(const TargetRegisterInfo &tri,
+ const MachineFunction &mf);
+
+ static bool isRegMaskId(RegisterId R) {
+ return TargetRegisterInfo::isStackSlot(R);
+ }
+ RegisterId getRegMaskId(const uint32_t *RM) const {
+ return TargetRegisterInfo::index2StackSlot(RegMasks.find(RM));
+ }
+ const uint32_t *getRegMaskBits(RegisterId R) const {
+ return RegMasks.get(TargetRegisterInfo::stackSlot2Index(R));
+ }
+ RegisterRef normalize(RegisterRef RR) const;
+
+ bool alias(RegisterRef RA, RegisterRef RB) const {
+ if (!isRegMaskId(RA.Reg))
+ return !isRegMaskId(RB.Reg) ? aliasRR(RA, RB) : aliasRM(RA, RB);
+ return !isRegMaskId(RB.Reg) ? aliasRM(RB, RA) : aliasMM(RA, RB);
+ }
+ std::set<RegisterId> getAliasSet(RegisterId Reg) const;
+
+ RegisterRef getRefForUnit(uint32_t U) const {
+ return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask);
+ }
+
+ const TargetRegisterInfo &getTRI() const { return TRI; }
+
+ private:
+ struct RegInfo {
+ const TargetRegisterClass *RegClass = nullptr;
+ };
+ struct UnitInfo {
+ RegisterId Reg = 0;
+ LaneBitmask Mask;
+ };
+
+ const TargetRegisterInfo &TRI;
+ std::vector<RegInfo> RegInfos;
+ std::vector<UnitInfo> UnitInfos;
+ IndexedSet<const uint32_t*> RegMasks;
+
+ bool aliasRR(RegisterRef RA, RegisterRef RB) const;
+ bool aliasRM(RegisterRef RR, RegisterRef RM) const;
+ bool aliasMM(RegisterRef RM, RegisterRef RN) const;
+ };
+
+
+ struct RegisterAggr {
+ RegisterAggr(const PhysicalRegisterInfo &pri)
+ : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {}
+ RegisterAggr(const RegisterAggr &RG) = default;
+
+ bool empty() const { return Units.empty(); }
+ bool hasAliasOf(RegisterRef RR) const;
+ bool hasCoverOf(RegisterRef RR) const;
+ static bool isCoverOf(RegisterRef RA, RegisterRef RB,
+ const PhysicalRegisterInfo &PRI) {
+ return RegisterAggr(PRI).insert(RA).hasCoverOf(RB);
+ }
+
+ RegisterAggr &insert(RegisterRef RR);
+ RegisterAggr &insert(const RegisterAggr &RG);
+ RegisterAggr &intersect(RegisterRef RR);
+ RegisterAggr &intersect(const RegisterAggr &RG);
+ RegisterAggr &clear(RegisterRef RR);
+ RegisterAggr &clear(const RegisterAggr &RG);
+
+ RegisterRef intersectWith(RegisterRef RR) const;
+ RegisterRef clearIn(RegisterRef RR) const;
+ RegisterRef makeRegRef() const;
+
+ void print(raw_ostream &OS) const;
+
+ struct rr_iterator {
+ typedef std::map<RegisterId,LaneBitmask> MapType;
+ private:
+ MapType Masks;
+ MapType::iterator Pos;
+ unsigned Index;
+ const RegisterAggr *Owner;
+ public:
+ rr_iterator(const RegisterAggr &RG, bool End);
+ RegisterRef operator*() const {
+ return RegisterRef(Pos->first, Pos->second);
+ }
+ rr_iterator &operator++() {
+ ++Pos;
+ ++Index;
+ return *this;
+ }
+ bool operator==(const rr_iterator &I) const {
+ assert(Owner == I.Owner);
+ return Index == I.Index;
+ }
+ bool operator!=(const rr_iterator &I) const {
+ return !(*this == I);
+ }
+ };
+
+ rr_iterator rr_begin() const {
+ return rr_iterator(*this, false);
+ }
+ rr_iterator rr_end() const {
+ return rr_iterator(*this, true);
+ }
+
+ private:
+ BitVector Units;
+ const PhysicalRegisterInfo &PRI;
+ };
+
+
+ // Optionally print the lane mask, if it is not ~0.
+ struct PrintLaneMaskOpt {
+ PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
+ LaneBitmask Mask;
+ };
+ raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P);
+
+} // namespace rdf
+} // namespace llvm
+
+#endif
+
diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 57ead973b56e..1d6c07974beb 100644
--- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -1096,7 +1096,7 @@ StringRef LanaiAsmParser::splitMnemonic(StringRef Name, SMLoc NameLoc,
return Mnemonic;
}
-bool IsMemoryAssignmentError(const OperandVector &Operands) {
+static bool IsMemoryAssignmentError(const OperandVector &Operands) {
// Detects if a memory operation has an erroneous base register modification.
// Memory operations are detected by matching the types of operands.
//
diff --git a/lib/Target/Lanai/InstPrinter/CMakeLists.txt b/lib/Target/Lanai/InstPrinter/CMakeLists.txt
index 6badb1c98a6d..7f76b895e6ec 100644
--- a/lib/Target/Lanai/InstPrinter/CMakeLists.txt
+++ b/lib/Target/Lanai/InstPrinter/CMakeLists.txt
@@ -1,3 +1,3 @@
-add_llvm_library(LLVMLanaiInstPrinter
+add_llvm_library(LLVMLanaiAsmPrinter
LanaiInstPrinter.cpp
)
diff --git a/lib/Target/Lanai/InstPrinter/LLVMBuild.txt b/lib/Target/Lanai/InstPrinter/LLVMBuild.txt
index eed9a587d14a..8d9768c6017b 100644
--- a/lib/Target/Lanai/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/Lanai/InstPrinter/LLVMBuild.txt
@@ -17,7 +17,7 @@
[component_0]
type = Library
-name = LanaiInstPrinter
+name = LanaiAsmPrinter
parent = Lanai
required_libraries = MC Support
add_to_library_groups = Lanai
diff --git a/lib/Target/Lanai/LLVMBuild.txt b/lib/Target/Lanai/LLVMBuild.txt
index 798fc351bd70..cb91ffb61a98 100644
--- a/lib/Target/Lanai/LLVMBuild.txt
+++ b/lib/Target/Lanai/LLVMBuild.txt
@@ -36,7 +36,7 @@ required_libraries =
LanaiAsmParser
LanaiDesc
LanaiInfo
- LanaiInstPrinter
+ LanaiAsmPrinter
MC
SelectionDAG
Support
diff --git a/lib/Target/Lanai/LanaiInstrInfo.cpp b/lib/Target/Lanai/LanaiInstrInfo.cpp
index fcd5da876b15..a7c9a7a7f280 100644
--- a/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -518,7 +518,7 @@ LanaiInstrInfo::optimizeSelect(MachineInstr &MI,
const MCInstrDesc &DefDesc = DefMI->getDesc();
for (unsigned i = 1, e = DefDesc.getNumOperands();
i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
- NewMI.addOperand(DefMI->getOperand(i));
+ NewMI.add(DefMI->getOperand(i));
unsigned CondCode = MI.getOperand(3).getImm();
if (Invert)
@@ -531,7 +531,7 @@ LanaiInstrInfo::optimizeSelect(MachineInstr &MI,
// register operand tied to the first def. The tie makes the register
// allocator ensure the FalseReg is allocated the same register as operand 0.
FalseReg.setImplicit();
- NewMI.addOperand(FalseReg);
+ NewMI.add(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
// Update SeenMIs set: register newly created MI and erase removed DefMI.
diff --git a/lib/Target/Lanai/LanaiMCInstLower.cpp b/lib/Target/Lanai/LanaiMCInstLower.cpp
index 39c633578d43..90ede6566acf 100644
--- a/lib/Target/Lanai/LanaiMCInstLower.cpp
+++ b/lib/Target/Lanai/LanaiMCInstLower.cpp
@@ -130,7 +130,7 @@ void LanaiMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
break;
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
}
diff --git a/lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt b/lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt
index 8070dbabb1a6..05e52ccc18d7 100644
--- a/lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = LanaiDesc
parent = Lanai
-required_libraries = LanaiInfo LanaiInstPrinter MC MCDisassembler Support
+required_libraries = LanaiInfo LanaiAsmPrinter MC MCDisassembler Support
add_to_library_groups = Lanai
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index a04fe8112fb9..0ef1401ef531 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -50,7 +50,7 @@ public:
: MCAsmBackend(), OSType(OST) {}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -90,7 +90,7 @@ bool LanaiAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
void LanaiAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned /*DataSize*/, uint64_t Value,
- bool /*IsPCRel*/) const {
+ bool /*IsPCRel*/, MCContext & /*Ctx*/) const {
MCFixupKind Kind = Fixup.getKind();
Value = adjustFixupValue(static_cast<unsigned>(Kind), Value);
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index f5b5335bb989..10254677a5ad 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -89,7 +89,7 @@ public:
} // end anonymous namespace
-Lanai::Fixups FixupKind(const MCExpr *Expr) {
+static Lanai::Fixups FixupKind(const MCExpr *Expr) {
if (isa<MCSymbolRefExpr>(Expr))
return Lanai::FIXUP_LANAI_21;
if (const LanaiMCExpr *McExpr = dyn_cast<LanaiMCExpr>(Expr)) {
@@ -134,8 +134,8 @@ unsigned LanaiMCCodeEmitter::getMachineOpValue(
}
// Helper function to adjust P and Q bits on load and store instructions.
-unsigned adjustPqBits(const MCInst &Inst, unsigned Value, unsigned PBitShift,
- unsigned QBitShift) {
+static unsigned adjustPqBits(const MCInst &Inst, unsigned Value,
+ unsigned PBitShift, unsigned QBitShift) {
const MCOperand AluOp = Inst.getOperand(3);
unsigned AluCode = AluOp.getImm();
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index 5fd6b6305f68..424b5ae418f7 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -194,8 +194,8 @@ bool MSP430BSel::expandBranches(OffsetVector &BlockOffsets) {
// Jump over the long branch on the opposite condition
TII->reverseBranchCondition(Cond);
MI = BuildMI(*MBB, MI, dl, TII->get(MSP430::JCC))
- .addMBB(NextMBB)
- .addOperand(Cond[0]);
+ .addMBB(NextMBB)
+ .add(Cond[0]);
InstrSizeDiff += TII->getInstSizeInBytes(*MI);
++MI;
}
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
index b38f5781c84a..0434f8abfbf4 100644
--- a/lib/Target/MSP430/MSP430CallingConv.td
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -13,11 +13,11 @@
// MSP430 Return Value Calling Convention
//===----------------------------------------------------------------------===//
def RetCC_MSP430 : CallingConv<[
- // i8 are returned in registers R15B, R14B, R13B, R12B
- CCIfType<[i8], CCAssignToReg<[R15B, R14B, R13B, R12B]>>,
+ // i8 are returned in registers R12B, R13B, R14B, R15B
+ CCIfType<[i8], CCAssignToReg<[R12B, R13B, R14B, R15B]>>,
- // i16 are returned in registers R15, R14, R13, R12
- CCIfType<[i16], CCAssignToReg<[R15, R14, R13, R12]>>
+ // i16 are returned in registers R12, R13, R14, R15
+ CCIfType<[i16], CCAssignToReg<[R12, R13, R14, R15]>>
]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 6e481b68e038..cd58eda5d924 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -61,7 +61,8 @@ namespace {
return GV != nullptr || CP != nullptr || ES != nullptr || JT != -1;
}
- void dump() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() {
errs() << "MSP430ISelAddressMode " << this << '\n';
if (BaseType == RegBase && Base.Reg.getNode() != nullptr) {
errs() << "Base.Reg ";
@@ -83,6 +84,7 @@ namespace {
} else if (JT != -1)
errs() << " JT" << JT << " Align" << Align << '\n';
}
+#endif
};
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 73346b9ce41d..40b1dd3cc2eb 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -245,13 +245,20 @@ MSP430TargetLowering::getRegForInlineAsmConstraint(
template<typename ArgT>
static void ParseFunctionArgs(const SmallVectorImpl<ArgT> &Args,
SmallVectorImpl<unsigned> &Out) {
- unsigned CurrentArgIndex = ~0U;
- for (unsigned i = 0, e = Args.size(); i != e; i++) {
- if (CurrentArgIndex == Args[i].OrigArgIndex) {
- Out.back()++;
+ unsigned CurrentArgIndex;
+
+ if (Args.empty())
+ return;
+
+ CurrentArgIndex = Args[0].OrigArgIndex;
+ Out.push_back(0);
+
+ for (auto &Arg : Args) {
+ if (CurrentArgIndex == Arg.OrigArgIndex) {
+ Out.back() += 1;
} else {
Out.push_back(1);
- CurrentArgIndex++;
+ CurrentArgIndex = Arg.OrigArgIndex;
}
}
}
@@ -275,7 +282,7 @@ static void AnalyzeArguments(CCState &State,
SmallVectorImpl<CCValAssign> &ArgLocs,
const SmallVectorImpl<ArgT> &Args) {
static const MCPhysReg RegList[] = {
- MSP430::R15, MSP430::R14, MSP430::R13, MSP430::R12
+ MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15
};
static const unsigned NbRegs = array_lengthof(RegList);
@@ -288,7 +295,7 @@ static void AnalyzeArguments(CCState &State,
ParseFunctionArgs(Args, ArgsParts);
unsigned RegsLeft = NbRegs;
- bool UseStack = false;
+ bool UsedStack = false;
unsigned ValNo = 0;
for (unsigned i = 0, e = ArgsParts.size(); i != e; i++) {
@@ -316,20 +323,22 @@ static void AnalyzeArguments(CCState &State,
unsigned Parts = ArgsParts[i];
- if (!UseStack && Parts <= RegsLeft) {
- unsigned FirstVal = ValNo;
+ if (!UsedStack && Parts == 2 && RegsLeft == 1) {
+ // Special case for 32-bit register split, see EABI section 3.3.3
+ unsigned Reg = State.AllocateReg(RegList);
+ State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo));
+ RegsLeft -= 1;
+
+ UsedStack = true;
+ CC_MSP430_AssignStack(ValNo++, ArgVT, LocVT, LocInfo, ArgFlags, State);
+ } else if (Parts <= RegsLeft) {
for (unsigned j = 0; j < Parts; j++) {
unsigned Reg = State.AllocateReg(RegList);
State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo));
RegsLeft--;
}
-
- // Reverse the order of the pieces to agree with the "big endian" format
- // required in the calling convention ABI.
- SmallVectorImpl<CCValAssign>::iterator B = ArgLocs.begin() + FirstVal;
- std::reverse(B, B + Parts);
} else {
- UseStack = true;
+ UsedStack = true;
for (unsigned j = 0; j < Parts; j++)
CC_MSP430_AssignStack(ValNo++, ArgVT, LocVT, LocInfo, ArgFlags, State);
}
@@ -351,10 +360,6 @@ static void AnalyzeReturnValues(CCState &State,
SmallVectorImpl<CCValAssign> &RVLocs,
const SmallVectorImpl<ArgT> &Args) {
AnalyzeRetResult(State, Args);
-
- // Reverse splitted return values to get the "big endian" format required
- // to agree with the calling convention ABI.
- std::reverse(RVLocs.begin(), RVLocs.end());
}
SDValue MSP430TargetLowering::LowerFormalArguments(
@@ -496,9 +501,33 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
}
}
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ if (Ins[i].Flags.isSRet()) {
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(
+ getRegClassFor(MVT::i16));
+ FuncInfo->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ }
+ }
+
return Chain;
}
+bool
+MSP430TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_MSP430);
+}
+
SDValue
MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
@@ -506,6 +535,8 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
// CCValAssign - represent the assignment of the return value to a location
SmallVector<CCValAssign, 16> RVLocs;
@@ -537,6 +568,22 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ if (MF.getFunction()->hasStructRetAttr()) {
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+
+ if (!Reg)
+ llvm_unreachable("sret virtual register not created in entry block");
+
+ SDValue Val =
+ DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy(DAG.getDataLayout()));
+ unsigned R12 = MSP430::R12;
+
+ Chain = DAG.getCopyToReg(Chain, dl, R12, Val, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(R12, getPointerTy(DAG.getDataLayout())));
+ }
+
unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
@@ -1219,7 +1266,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI,
BB->end());
RemBB->transferSuccessorsAndUpdatePHIs(BB);
- // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
+ // Add edges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
BB->addSuccessor(LoopBB);
BB->addSuccessor(RemBB);
LoopBB->addSuccessor(RemBB);
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 8864807e999e..3a729623c99a 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -158,6 +158,12 @@ namespace llvm {
LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
+ bool CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index 47b0e270c5b3..e7716382b222 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -119,7 +119,7 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOperand MCOp;
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 2d937318c7e5..fcaa8a1d6c72 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -33,15 +33,23 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo {
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex;
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
public:
MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
explicit MSP430MachineFunctionInfo(MachineFunction &MF)
- : CalleeSavedFrameSize(0), ReturnAddrIndex(0) {}
+ : CalleeSavedFrameSize(0), ReturnAddrIndex(0), SRetReturnReg(0) {}
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
int getRAIndex() const { return ReturnAddrIndex; }
void setRAIndex(int Index) { ReturnAddrIndex = Index; }
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index d054578deb67..d407774574be 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -7,47 +7,67 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsABIFlagsSection.h"
#include "MCTargetDesc/MipsABIInfo.h"
#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
-#include "MipsRegisterInfo.h"
-#include "MipsTargetObjectFile.h"
#include "MipsTargetStreamer.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
#include <memory>
+#include <string>
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "mips-asm-parser"
namespace llvm {
+
class MCInstrInfo;
-}
+
+} // end namespace llvm
namespace {
+
class MipsAssemblerOptions {
public:
- MipsAssemblerOptions(const FeatureBitset &Features_) :
- ATReg(1), Reorder(true), Macro(true), Features(Features_) {}
+ MipsAssemblerOptions(const FeatureBitset &Features_) : Features(Features_) {}
MipsAssemblerOptions(const MipsAssemblerOptions *Opts) {
ATReg = Opts->getATRegIndex();
@@ -84,12 +104,13 @@ public:
static const FeatureBitset AllArchRelatedMask;
private:
- unsigned ATReg;
- bool Reorder;
- bool Macro;
+ unsigned ATReg = 1;
+ bool Reorder = true;
+ bool Macro = true;
FeatureBitset Features;
};
-}
+
+} // end anonymous namespace
const FeatureBitset MipsAssemblerOptions::AllArchRelatedMask = {
Mips::FeatureMips1, Mips::FeatureMips2, Mips::FeatureMips3,
@@ -103,6 +124,7 @@ const FeatureBitset MipsAssemblerOptions::AllArchRelatedMask = {
};
namespace {
+
class MipsAsmParser : public MCTargetAsmParser {
MipsTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
@@ -147,6 +169,8 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseBracketSuffix(StringRef Name, OperandVector &Operands);
+ bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID);
+
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -252,6 +276,18 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
+ bool expandMulImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
+ bool expandMulO(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
+ bool expandMulOU(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
+ bool expandDMULMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
bool expandLoadStoreDMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI, bool IsLoad);
@@ -339,6 +375,8 @@ class MipsAsmParser : public MCTargetAsmParser {
/// This should be used in pseudo-instruction expansions which need AT.
unsigned getATReg(SMLoc Loc);
+ bool canUseATReg();
+
bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
@@ -466,9 +504,11 @@ public:
bool isGP64bit() const {
return getSTI().getFeatureBits()[Mips::FeatureGP64Bit];
}
+
bool isFP64bit() const {
return getSTI().getFeatureBits()[Mips::FeatureFP64Bit];
}
+
const MipsABIInfo &getABI() const { return ABI; }
bool isABI_N32() const { return ABI.IsN32(); }
bool isABI_N64() const { return ABI.IsN64(); }
@@ -484,48 +524,63 @@ public:
bool inMicroMipsMode() const {
return getSTI().getFeatureBits()[Mips::FeatureMicroMips];
}
+
bool hasMips1() const {
return getSTI().getFeatureBits()[Mips::FeatureMips1];
}
+
bool hasMips2() const {
return getSTI().getFeatureBits()[Mips::FeatureMips2];
}
+
bool hasMips3() const {
return getSTI().getFeatureBits()[Mips::FeatureMips3];
}
+
bool hasMips4() const {
return getSTI().getFeatureBits()[Mips::FeatureMips4];
}
+
bool hasMips5() const {
return getSTI().getFeatureBits()[Mips::FeatureMips5];
}
+
bool hasMips32() const {
return getSTI().getFeatureBits()[Mips::FeatureMips32];
}
+
bool hasMips64() const {
return getSTI().getFeatureBits()[Mips::FeatureMips64];
}
+
bool hasMips32r2() const {
return getSTI().getFeatureBits()[Mips::FeatureMips32r2];
}
+
bool hasMips64r2() const {
return getSTI().getFeatureBits()[Mips::FeatureMips64r2];
}
+
bool hasMips32r3() const {
return (getSTI().getFeatureBits()[Mips::FeatureMips32r3]);
}
+
bool hasMips64r3() const {
return (getSTI().getFeatureBits()[Mips::FeatureMips64r3]);
}
+
bool hasMips32r5() const {
return (getSTI().getFeatureBits()[Mips::FeatureMips32r5]);
}
+
bool hasMips64r5() const {
return (getSTI().getFeatureBits()[Mips::FeatureMips64r5]);
}
+
bool hasMips32r6() const {
return getSTI().getFeatureBits()[Mips::FeatureMips32r6];
}
+
bool hasMips64r6() const {
return getSTI().getFeatureBits()[Mips::FeatureMips64r6];
}
@@ -533,15 +588,19 @@ public:
bool hasDSP() const {
return getSTI().getFeatureBits()[Mips::FeatureDSP];
}
+
bool hasDSPR2() const {
return getSTI().getFeatureBits()[Mips::FeatureDSPR2];
}
+
bool hasDSPR3() const {
return getSTI().getFeatureBits()[Mips::FeatureDSPR3];
}
+
bool hasMSA() const {
return getSTI().getFeatureBits()[Mips::FeatureMSA];
}
+
bool hasCnMips() const {
return (getSTI().getFeatureBits()[Mips::FeatureCnMips]);
}
@@ -627,9 +686,6 @@ public:
}
}
};
-}
-
-namespace {
/// MipsOperand - Instances of this class represent a parsed Mips machine
/// instruction.
@@ -671,6 +727,22 @@ public:
MipsOperand(KindTy K, MipsAsmParser &Parser)
: MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {}
+ ~MipsOperand() override {
+ switch (Kind) {
+ case k_Immediate:
+ break;
+ case k_Memory:
+ delete Mem.Base;
+ break;
+ case k_RegList:
+ delete RegList.List;
+ case k_RegisterIndex:
+ case k_Token:
+ case k_RegPair:
+ break;
+ }
+ }
+
private:
/// For diagnostics, and checking the assembler temporary
MipsAsmParser &AsmParser;
@@ -716,7 +788,7 @@ private:
const MCRegisterInfo *RegInfo,
SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
- auto Op = make_unique<MipsOperand>(k_RegisterIndex, Parser);
+ auto Op = llvm::make_unique<MipsOperand>(k_RegisterIndex, Parser);
Op->RegIdx.Index = Index;
Op->RegIdx.RegInfo = RegInfo;
Op->RegIdx.Kind = RegKind;
@@ -896,6 +968,16 @@ public:
/// Render the operand to an MCInst as a GPR32
/// Asserts if the wrong number of operands are requested, or the operand
/// is not a k_RegisterIndex compatible with RegKind_GPR
+ void addGPR32ZeroAsmRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getGPR32Reg()));
+ }
+
+ void addGPR32NonZeroAsmRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getGPR32Reg()));
+ }
+
void addGPR32AsmRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getGPR32Reg()));
@@ -1104,45 +1186,58 @@ public:
// $0/$zero here so that MCK_ZERO works correctly.
return isGPRAsmReg() && RegIdx.Index == 0;
}
+
bool isRegIdx() const { return Kind == k_RegisterIndex; }
bool isImm() const override { return Kind == k_Immediate; }
+
bool isConstantImm() const {
int64_t Res;
return isImm() && getImm()->evaluateAsAbsolute(Res);
}
+
bool isConstantImmz() const {
return isConstantImm() && getConstantImm() == 0;
}
+
template <unsigned Bits, int Offset = 0> bool isConstantUImm() const {
return isConstantImm() && isUInt<Bits>(getConstantImm() - Offset);
}
+
template <unsigned Bits> bool isSImm() const {
return isConstantImm() ? isInt<Bits>(getConstantImm()) : isImm();
}
+
template <unsigned Bits> bool isUImm() const {
return isConstantImm() ? isUInt<Bits>(getConstantImm()) : isImm();
}
+
template <unsigned Bits> bool isAnyImm() const {
return isConstantImm() ? (isInt<Bits>(getConstantImm()) ||
isUInt<Bits>(getConstantImm()))
: isImm();
}
+
template <unsigned Bits, int Offset = 0> bool isConstantSImm() const {
return isConstantImm() && isInt<Bits>(getConstantImm() - Offset);
}
+
template <unsigned Bottom, unsigned Top> bool isConstantUImmRange() const {
return isConstantImm() && getConstantImm() >= Bottom &&
getConstantImm() <= Top;
}
+
bool isToken() const override {
// Note: It's not possible to pretend that other operand kinds are tokens.
// The matcher emitter checks tokens first.
return Kind == k_Token;
}
+
bool isMem() const override { return Kind == k_Memory; }
+
bool isConstantMemOff() const {
return isMem() && isa<MCConstantExpr>(getMemOff());
}
+
// Allow relocation operators.
// FIXME: This predicate and others need to look through binary expressions
// and determine whether a Value is a constant or not.
@@ -1160,28 +1255,34 @@ public:
bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr);
return IsReloc && isShiftedInt<Bits, ShiftAmount>(Res.getConstant());
}
+
bool isMemWithGRPMM16Base() const {
return isMem() && getMemBase()->isMM16AsmReg();
}
+
template <unsigned Bits> bool isMemWithUimmOffsetSP() const {
return isMem() && isConstantMemOff() && isUInt<Bits>(getConstantMemOff())
&& getMemBase()->isRegIdx() && (getMemBase()->getGPR32Reg() == Mips::SP);
}
+
template <unsigned Bits> bool isMemWithUimmWordAlignedOffsetSP() const {
return isMem() && isConstantMemOff() && isUInt<Bits>(getConstantMemOff())
&& (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx()
&& (getMemBase()->getGPR32Reg() == Mips::SP);
}
+
template <unsigned Bits> bool isMemWithSimmWordAlignedOffsetGP() const {
return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff())
&& (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx()
&& (getMemBase()->getGPR32Reg() == Mips::GP);
}
+
template <unsigned Bits, unsigned ShiftLeftAmount>
bool isScaledUImm() const {
return isConstantImm() &&
isShiftedUInt<Bits, ShiftLeftAmount>(getConstantImm());
}
+
template <unsigned Bits, unsigned ShiftLeftAmount>
bool isScaledSImm() const {
if (isConstantImm() && isShiftedInt<Bits, ShiftLeftAmount>(getConstantImm()))
@@ -1193,6 +1294,7 @@ public:
bool Success = getImm()->evaluateAsRelocatable(Res, nullptr, nullptr);
return Success && isShiftedInt<Bits, ShiftLeftAmount>(Res.getConstant());
}
+
bool isRegList16() const {
if (!isRegList())
return false;
@@ -1217,14 +1319,18 @@ public:
return true;
}
+
bool isInvNum() const { return Kind == k_Immediate; }
+
bool isLSAImm() const {
if (!isConstantImm())
return false;
int64_t Val = getConstantImm();
return 1 <= Val && Val <= 4;
}
+
bool isRegList() const { return Kind == k_RegList; }
+
bool isMovePRegPair() const {
if (Kind != k_RegList || RegList.List->size() != 2)
return false;
@@ -1257,6 +1363,7 @@ public:
assert(Kind == k_Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
}
+
bool isRegPair() const {
return Kind == k_RegPair && RegIdx.Index <= 30;
}
@@ -1310,7 +1417,7 @@ public:
static std::unique_ptr<MipsOperand> CreateToken(StringRef Str, SMLoc S,
MipsAsmParser &Parser) {
- auto Op = make_unique<MipsOperand>(k_Token, Parser);
+ auto Op = llvm::make_unique<MipsOperand>(k_Token, Parser);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -1385,7 +1492,7 @@ public:
static std::unique_ptr<MipsOperand>
CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, MipsAsmParser &Parser) {
- auto Op = make_unique<MipsOperand>(k_Immediate, Parser);
+ auto Op = llvm::make_unique<MipsOperand>(k_Immediate, Parser);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -1395,7 +1502,7 @@ public:
static std::unique_ptr<MipsOperand>
CreateMem(std::unique_ptr<MipsOperand> Base, const MCExpr *Off, SMLoc S,
SMLoc E, MipsAsmParser &Parser) {
- auto Op = make_unique<MipsOperand>(k_Memory, Parser);
+ auto Op = llvm::make_unique<MipsOperand>(k_Memory, Parser);
Op->Mem.Base = Base.release();
Op->Mem.Off = Off;
Op->StartLoc = S;
@@ -1406,9 +1513,9 @@ public:
static std::unique_ptr<MipsOperand>
CreateRegList(SmallVectorImpl<unsigned> &Regs, SMLoc StartLoc, SMLoc EndLoc,
MipsAsmParser &Parser) {
- assert (Regs.size() > 0 && "Empty list not allowed");
+ assert(Regs.size() > 0 && "Empty list not allowed");
- auto Op = make_unique<MipsOperand>(k_RegList, Parser);
+ auto Op = llvm::make_unique<MipsOperand>(k_RegList, Parser);
Op->RegList.List = new SmallVector<unsigned, 10>(Regs.begin(), Regs.end());
Op->StartLoc = StartLoc;
Op->EndLoc = EndLoc;
@@ -1418,7 +1525,7 @@ public:
static std::unique_ptr<MipsOperand> CreateRegPair(const MipsOperand &MOP,
SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
- auto Op = make_unique<MipsOperand>(k_RegPair, Parser);
+ auto Op = llvm::make_unique<MipsOperand>(k_RegPair, Parser);
Op->RegIdx.Index = MOP.RegIdx.Index;
Op->RegIdx.RegInfo = MOP.RegIdx.RegInfo;
Op->RegIdx.Kind = MOP.RegIdx.Kind;
@@ -1427,14 +1534,25 @@ public:
return Op;
}
+ bool isGPRZeroAsmReg() const {
+ return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index == 0;
+ }
+
+ bool isGPRNonZeroAsmReg() const {
+ return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index > 0 &&
+ RegIdx.Index <= 31;
+ }
+
bool isGPRAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index <= 31;
}
+
bool isMM16AsmReg() const {
if (!(isRegIdx() && RegIdx.Kind))
return false;
return ((RegIdx.Index >= 2 && RegIdx.Index <= 7)
|| RegIdx.Index == 16 || RegIdx.Index == 17);
+
}
bool isMM16AsmRegZero() const {
if (!(isRegIdx() && RegIdx.Kind))
@@ -1443,42 +1561,53 @@ public:
(RegIdx.Index >= 2 && RegIdx.Index <= 7) ||
RegIdx.Index == 17);
}
+
bool isMM16AsmRegMoveP() const {
if (!(isRegIdx() && RegIdx.Kind))
return false;
return (RegIdx.Index == 0 || (RegIdx.Index >= 2 && RegIdx.Index <= 3) ||
(RegIdx.Index >= 16 && RegIdx.Index <= 20));
}
+
bool isFGRAsmReg() const {
// AFGR64 is $0-$15 but we handle this in getAFGR64()
return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31;
}
+
bool isHWRegsAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_HWRegs && RegIdx.Index <= 31;
}
+
bool isCCRAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_CCR && RegIdx.Index <= 31;
}
+
bool isFCCAsmReg() const {
if (!(isRegIdx() && RegIdx.Kind & RegKind_FCC))
return false;
return RegIdx.Index <= 7;
}
+
bool isACCAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_ACC && RegIdx.Index <= 3;
}
+
bool isCOP0AsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_COP0 && RegIdx.Index <= 31;
}
+
bool isCOP2AsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_COP2 && RegIdx.Index <= 31;
}
+
bool isCOP3AsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_COP3 && RegIdx.Index <= 31;
}
+
bool isMSA128AsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_MSA128 && RegIdx.Index <= 31;
}
+
bool isMSACtrlAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_MSACtrl && RegIdx.Index <= 7;
}
@@ -1488,22 +1617,6 @@ public:
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const override { return EndLoc; }
- virtual ~MipsOperand() {
- switch (Kind) {
- case k_Immediate:
- break;
- case k_Memory:
- delete Mem.Base;
- break;
- case k_RegList:
- delete RegList.List;
- case k_RegisterIndex:
- case k_Token:
- case k_RegPair:
- break;
- }
- }
-
void print(raw_ostream &OS) const override {
switch (Kind) {
case k_Immediate:
@@ -1553,11 +1666,15 @@ public:
}
}
}; // class MipsOperand
-} // namespace
+
+} // end anonymous namespace
namespace llvm {
+
extern const MCInstrDesc MipsInsts[];
-}
+
+} // end namespace llvm
+
static const MCInstrDesc &getInstDesc(unsigned Opcode) {
return MipsInsts[Opcode];
}
@@ -1785,6 +1902,61 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
}
}
+ // Warn on division by zero. We're checking here as all instructions get
+ // processed here, not just the macros that need expansion.
+ //
+ // The MIPS backend models most of the divison instructions and macros as
+ // three operand instructions. The pre-R6 divide instructions however have
+ // two operands and explicitly define HI/LO as part of the instruction,
+ // not in the operands.
+ unsigned FirstOp = 1;
+ unsigned SecondOp = 2;
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+ case Mips::SDivIMacro:
+ case Mips::UDivIMacro:
+ case Mips::DSDivIMacro:
+ case Mips::DUDivIMacro:
+ if (Inst.getOperand(2).getImm() == 0) {
+ if (Inst.getOperand(1).getReg() == Mips::ZERO ||
+ Inst.getOperand(1).getReg() == Mips::ZERO_64)
+ Warning(IDLoc, "dividing zero by zero");
+ else
+ Warning(IDLoc, "division by zero");
+ }
+ break;
+ case Mips::DSDIV:
+ case Mips::SDIV:
+ case Mips::UDIV:
+ case Mips::DUDIV:
+ case Mips::UDIV_MM:
+ case Mips::SDIV_MM:
+ FirstOp = 0;
+ SecondOp = 1;
+ case Mips::SDivMacro:
+ case Mips::DSDivMacro:
+ case Mips::UDivMacro:
+ case Mips::DUDivMacro:
+ case Mips::DIV:
+ case Mips::DIVU:
+ case Mips::DDIV:
+ case Mips::DDIVU:
+ case Mips::DIVU_MMR6:
+ case Mips::DDIVU_MM64R6:
+ case Mips::DIV_MMR6:
+ case Mips::DDIV_MM64R6:
+ if (Inst.getOperand(SecondOp).getReg() == Mips::ZERO ||
+ Inst.getOperand(SecondOp).getReg() == Mips::ZERO_64) {
+ if (Inst.getOperand(FirstOp).getReg() == Mips::ZERO ||
+ Inst.getOperand(FirstOp).getReg() == Mips::ZERO_64)
+ Warning(IDLoc, "dividing zero by zero");
+ else
+ Warning(IDLoc, "division by zero");
+ }
+ break;
+ }
+
// For PIC code convert unconditional jump to unconditional branch.
if ((Inst.getOpcode() == Mips::J || Inst.getOpcode() == Mips::J_MM) &&
inPicMode()) {
@@ -2135,6 +2307,8 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return expandJalWithRegs(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::BneImm:
case Mips::BeqImm:
+ case Mips::BEQLImmMacro:
+ case Mips::BNELImmMacro:
return expandBranchImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::BLT:
case Mips::BLE:
@@ -2170,15 +2344,19 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
case Mips::BGTULImmMacro:
return expandCondBranches(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::SDivMacro:
+ case Mips::SDivIMacro:
return expandDiv(Inst, IDLoc, Out, STI, false, true) ? MER_Fail
: MER_Success;
case Mips::DSDivMacro:
+ case Mips::DSDivIMacro:
return expandDiv(Inst, IDLoc, Out, STI, true, true) ? MER_Fail
: MER_Success;
case Mips::UDivMacro:
+ case Mips::UDivIMacro:
return expandDiv(Inst, IDLoc, Out, STI, false, false) ? MER_Fail
: MER_Success;
case Mips::DUDivMacro:
+ case Mips::DUDivIMacro:
return expandDiv(Inst, IDLoc, Out, STI, true, false) ? MER_Fail
: MER_Success;
case Mips::PseudoTRUNC_W_S:
@@ -2200,11 +2378,24 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
case Mips::Usw:
return expandUxw(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::NORImm:
+ case Mips::NORImm64:
+ return expandAliasImmediate(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::SLTImm64:
+ if (isInt<16>(Inst.getOperand(2).getImm())) {
+ Inst.setOpcode(Mips::SLTi64);
+ return MER_NotAMacro;
+ }
+ return expandAliasImmediate(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::SLTUImm64:
+ if (isInt<16>(Inst.getOperand(2).getImm())) {
+ Inst.setOpcode(Mips::SLTiu64);
+ return MER_NotAMacro;
+ }
return expandAliasImmediate(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
- case Mips::ADDi:
- case Mips::ADDiu:
- case Mips::SLTi:
- case Mips::SLTiu:
+ case Mips::ADDi: case Mips::ADDi_MM:
+ case Mips::ADDiu: case Mips::ADDiu_MM:
+ case Mips::SLTi: case Mips::SLTi_MM:
+ case Mips::SLTiu: case Mips::SLTiu_MM:
if ((Inst.getNumOperands() == 3) && Inst.getOperand(0).isReg() &&
Inst.getOperand(1).isReg() && Inst.getOperand(2).isImm()) {
int64_t ImmValue = Inst.getOperand(2).getImm();
@@ -2214,9 +2405,9 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
: MER_Success;
}
return MER_NotAMacro;
- case Mips::ANDi:
- case Mips::ORi:
- case Mips::XORi:
+ case Mips::ANDi: case Mips::ANDi_MM: case Mips::ANDi64:
+ case Mips::ORi: case Mips::ORi_MM: case Mips::ORi64:
+ case Mips::XORi: case Mips::XORi_MM: case Mips::XORi64:
if ((Inst.getNumOperands() == 3) && Inst.getOperand(0).isReg() &&
Inst.getOperand(1).isReg() && Inst.getOperand(2).isImm()) {
int64_t ImmValue = Inst.getOperand(2).getImm();
@@ -2240,6 +2431,17 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return expandDRotationImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::ABSMacro:
return expandAbs(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::MULImmMacro:
+ case Mips::DMULImmMacro:
+ return expandMulImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::MULOMacro:
+ case Mips::DMULOMacro:
+ return expandMulO(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::MULOUMacro:
+ case Mips::DMULOUMacro:
+ return expandMulOU(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::DMULMacro:
+ return expandDMULMacro(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::LDMacro:
case Mips::SDMacro:
return expandLoadStoreDMacro(Inst, IDLoc, Out, STI,
@@ -2392,7 +2594,6 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
uint16_t Bits15To0 = ImmValue & 0xffff;
-
if (!Is32BitImm && !isInt<32>(ImmValue)) {
// Traditional behaviour seems to special case this particular value. It's
// not clear why other masks are handled differently.
@@ -2618,20 +2819,24 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
// This is the 64-bit symbol address expansion.
if (ABI.ArePtrs64bit() && isGP64bit()) {
- // We always need AT for the 64-bit expansion.
- // If it is not available we exit.
- unsigned ATReg = getATReg(IDLoc);
- if (!ATReg)
- return true;
+ // We need AT for the 64-bit expansion in the cases where the optional
+ // source register is the destination register and for the superscalar
+ // scheduled form.
+ //
+ // If it is not available we exit if the destination is the same as the
+ // source register.
const MipsMCExpr *HighestExpr =
MipsMCExpr::create(MipsMCExpr::MEK_HIGHEST, SymExpr, getContext());
const MipsMCExpr *HigherExpr =
MipsMCExpr::create(MipsMCExpr::MEK_HIGHER, SymExpr, getContext());
- if (UseSrcReg &&
- getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg,
- SrcReg)) {
+ bool RdRegIsRsReg =
+ getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg, SrcReg);
+
+ if (canUseATReg() && UseSrcReg && RdRegIsRsReg) {
+ unsigned ATReg = getATReg(IDLoc);
+
// If $rs is the same as $rd:
// (d)la $rd, sym($rd) => lui $at, %highest(sym)
// daddiu $at, $at, %higher(sym)
@@ -2653,29 +2858,65 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
TOut.emitRRR(Mips::DADDu, DstReg, ATReg, SrcReg, IDLoc, STI);
return false;
- }
+ } else if (canUseATReg() && !RdRegIsRsReg) {
+ unsigned ATReg = getATReg(IDLoc);
- // Otherwise, if the $rs is different from $rd or if $rs isn't specified:
- // (d)la $rd, sym/sym($rs) => lui $rd, %highest(sym)
- // lui $at, %hi(sym)
- // daddiu $rd, $rd, %higher(sym)
- // daddiu $at, $at, %lo(sym)
- // dsll32 $rd, $rd, 0
- // daddu $rd, $rd, $at
- // (daddu $rd, $rd, $rs)
- TOut.emitRX(Mips::LUi, DstReg, MCOperand::createExpr(HighestExpr), IDLoc,
- STI);
- TOut.emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HiExpr), IDLoc, STI);
- TOut.emitRRX(Mips::DADDiu, DstReg, DstReg,
- MCOperand::createExpr(HigherExpr), IDLoc, STI);
- TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(LoExpr),
- IDLoc, STI);
- TOut.emitRRI(Mips::DSLL32, DstReg, DstReg, 0, IDLoc, STI);
- TOut.emitRRR(Mips::DADDu, DstReg, DstReg, ATReg, IDLoc, STI);
- if (UseSrcReg)
- TOut.emitRRR(Mips::DADDu, DstReg, DstReg, SrcReg, IDLoc, STI);
+ // If the $rs is different from $rd or if $rs isn't specified and we
+ // have $at available:
+ // (d)la $rd, sym/sym($rs) => lui $rd, %highest(sym)
+ // lui $at, %hi(sym)
+ // daddiu $rd, $rd, %higher(sym)
+ // daddiu $at, $at, %lo(sym)
+ // dsll32 $rd, $rd, 0
+ // daddu $rd, $rd, $at
+ // (daddu $rd, $rd, $rs)
+ //
+ // Which is preferred for superscalar issue.
+ TOut.emitRX(Mips::LUi, DstReg, MCOperand::createExpr(HighestExpr), IDLoc,
+ STI);
+ TOut.emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HiExpr), IDLoc, STI);
+ TOut.emitRRX(Mips::DADDiu, DstReg, DstReg,
+ MCOperand::createExpr(HigherExpr), IDLoc, STI);
+ TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(LoExpr),
+ IDLoc, STI);
+ TOut.emitRRI(Mips::DSLL32, DstReg, DstReg, 0, IDLoc, STI);
+ TOut.emitRRR(Mips::DADDu, DstReg, DstReg, ATReg, IDLoc, STI);
+ if (UseSrcReg)
+ TOut.emitRRR(Mips::DADDu, DstReg, DstReg, SrcReg, IDLoc, STI);
- return false;
+ return false;
+ } else if (!canUseATReg() && !RdRegIsRsReg) {
+ // Otherwise, synthesize the address in the destination register
+ // serially:
+ // (d)la $rd, sym/sym($rs) => lui $rd, %highest(sym)
+ // daddiu $rd, $rd, %higher(sym)
+ // dsll $rd, $rd, 16
+ // daddiu $rd, $rd, %hi(sym)
+ // dsll $rd, $rd, 16
+ // daddiu $rd, $rd, %lo(sym)
+ TOut.emitRX(Mips::LUi, DstReg, MCOperand::createExpr(HighestExpr), IDLoc,
+ STI);
+ TOut.emitRRX(Mips::DADDiu, DstReg, DstReg,
+ MCOperand::createExpr(HigherExpr), IDLoc, STI);
+ TOut.emitRRI(Mips::DSLL, DstReg, DstReg, 16, IDLoc, STI);
+ TOut.emitRRX(Mips::DADDiu, DstReg, DstReg,
+ MCOperand::createExpr(HiExpr), IDLoc, STI);
+ TOut.emitRRI(Mips::DSLL, DstReg, DstReg, 16, IDLoc, STI);
+ TOut.emitRRX(Mips::DADDiu, DstReg, DstReg,
+ MCOperand::createExpr(LoExpr), IDLoc, STI);
+ if (UseSrcReg)
+ TOut.emitRRR(Mips::DADDu, DstReg, DstReg, SrcReg, IDLoc, STI);
+
+ return false;
+ } else {
+ // We have a case where SrcReg == DstReg and we don't have $at
+ // available. We can't expand this case, so error out appropriately.
+ assert(SrcReg == DstReg && !canUseATReg() &&
+ "Could have expanded dla but didn't?");
+ reportParseError(IDLoc,
+ "pseudo-instruction requires $at, which is not available");
+ return true;
+ }
}
// And now, the 32-bit symbol address expansion:
@@ -2769,6 +3010,8 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
assert((MemOffsetOp.isImm() || MemOffsetOp.isExpr()) &&
"expected immediate or expression operand");
+ bool IsLikely = false;
+
unsigned OpCode = 0;
switch(Inst.getOpcode()) {
case Mips::BneImm:
@@ -2777,16 +3020,29 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
case Mips::BeqImm:
OpCode = Mips::BEQ;
break;
+ case Mips::BEQLImmMacro:
+ OpCode = Mips::BEQL;
+ IsLikely = true;
+ break;
+ case Mips::BNELImmMacro:
+ OpCode = Mips::BNEL;
+ IsLikely = true;
+ break;
default:
llvm_unreachable("Unknown immediate branch pseudo-instruction.");
break;
}
int64_t ImmValue = ImmOp.getImm();
- if (ImmValue == 0)
- TOut.emitRRX(OpCode, DstRegOp.getReg(), Mips::ZERO, MemOffsetOp, IDLoc,
- STI);
- else {
+ if (ImmValue == 0) {
+ if (IsLikely) {
+ TOut.emitRRX(OpCode, DstRegOp.getReg(), Mips::ZERO,
+ MCOperand::createExpr(MemOffsetOp.getExpr()), IDLoc, STI);
+ TOut.emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
+ } else
+ TOut.emitRRX(OpCode, DstRegOp.getReg(), Mips::ZERO, MemOffsetOp, IDLoc,
+ STI);
+ } else {
warnIfNoMacro(IDLoc);
unsigned ATReg = getATReg(IDLoc);
@@ -2797,7 +3053,12 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
IDLoc, Out, STI))
return true;
- TOut.emitRRX(OpCode, DstRegOp.getReg(), ATReg, MemOffsetOp, IDLoc, STI);
+ if (IsLikely) {
+ TOut.emitRRX(OpCode, DstRegOp.getReg(), ATReg,
+ MCOperand::createExpr(MemOffsetOp.getExpr()), IDLoc, STI);
+ TOut.emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
+ } else
+ TOut.emitRRX(OpCode, DstRegOp.getReg(), ATReg, MemOffsetOp, IDLoc, STI);
}
return false;
}
@@ -2904,9 +3165,9 @@ bool MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
unsigned Opcode = Inst.getOpcode();
unsigned NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM32_MM : Mips::LWM32_MM;
- assert (Inst.getOperand(OpNum - 1).isImm() &&
- Inst.getOperand(OpNum - 2).isReg() &&
- Inst.getOperand(OpNum - 3).isReg() && "Invalid instruction operand.");
+ assert(Inst.getOperand(OpNum - 1).isImm() &&
+ Inst.getOperand(OpNum - 2).isReg() &&
+ Inst.getOperand(OpNum - 3).isReg() && "Invalid instruction operand.");
if (OpNum < 8 && Inst.getOperand(OpNum - 1).getImm() <= 60 &&
Inst.getOperand(OpNum - 1).getImm() >= 0 &&
@@ -3185,6 +3446,14 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
return false;
}
+// Expand a integer division macro.
+//
+// Notably we don't have to emit a warning when encountering $rt as the $zero
+// register, or 0 as an immediate. processInstruction() has already done that.
+//
+// The destination register can only be $zero when expanding (S)DivIMacro or
+// D(S)DivMacro.
+
bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI, const bool IsMips64,
const bool Signed) {
@@ -3200,67 +3469,88 @@ bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
assert(RsRegOp.isReg() && "expected register operand kind");
unsigned RsReg = RsRegOp.getReg();
- const MCOperand &RtRegOp = Inst.getOperand(2);
- assert(RtRegOp.isReg() && "expected register operand kind");
- unsigned RtReg = RtRegOp.getReg();
+ unsigned RtReg;
+ int64_t ImmValue;
+
+ const MCOperand &RtOp = Inst.getOperand(2);
+ assert((RtOp.isReg() || RtOp.isImm()) &&
+ "expected register or immediate operand kind");
+ if (RtOp.isReg())
+ RtReg = RtOp.getReg();
+ else
+ ImmValue = RtOp.getImm();
+
unsigned DivOp;
unsigned ZeroReg;
+ unsigned SubOp;
if (IsMips64) {
DivOp = Signed ? Mips::DSDIV : Mips::DUDIV;
ZeroReg = Mips::ZERO_64;
+ SubOp = Mips::DSUB;
} else {
DivOp = Signed ? Mips::SDIV : Mips::UDIV;
ZeroReg = Mips::ZERO;
+ SubOp = Mips::SUB;
}
bool UseTraps = useTraps();
- if (RsReg == Mips::ZERO || RsReg == Mips::ZERO_64) {
- if (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64)
- Warning(IDLoc, "dividing zero by zero");
- if (IsMips64) {
- if (Signed && (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64)) {
- if (UseTraps) {
- TOut.emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, STI);
- return false;
- }
+ if (RtOp.isImm()) {
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+ if (ImmValue == 0) {
+ if (UseTraps)
+ TOut.emitRRI(Mips::TEQ, ZeroReg, ZeroReg, 0x7, IDLoc, STI);
+ else
TOut.emitII(Mips::BREAK, 0x7, 0, IDLoc, STI);
- return false;
- }
+ return false;
+ }
+
+ if (ImmValue == 1) {
+ TOut.emitRRR(Mips::OR, RdReg, RsReg, Mips::ZERO, IDLoc, STI);
+ return false;
+ } else if (Signed && ImmValue == -1) {
+ TOut.emitRRR(SubOp, RdReg, ZeroReg, RsReg, IDLoc, STI);
+ return false;
} else {
- TOut.emitRR(DivOp, RsReg, RtReg, IDLoc, STI);
+ if (loadImmediate(ImmValue, ATReg, Mips::NoRegister, isInt<32>(ImmValue),
+ false, Inst.getLoc(), Out, STI))
+ return true;
+ TOut.emitRR(DivOp, RsReg, ATReg, IDLoc, STI);
+ TOut.emitR(Mips::MFLO, RdReg, IDLoc, STI);
return false;
}
+ return true;
}
+ // If the macro expansion of (d)div(u) would always trap or break, insert
+ // the trap/break and exit. This gives a different result to GAS. GAS has
+ // an inconsistency/missed optimization in that not all cases are handled
+ // equivalently. As the observed behaviour is the same, we're ok.
if (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64) {
- Warning(IDLoc, "division by zero");
- if (Signed) {
- if (UseTraps) {
- TOut.emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, STI);
- return false;
- }
-
- TOut.emitII(Mips::BREAK, 0x7, 0, IDLoc, STI);
+ if (UseTraps) {
+ TOut.emitRRI(Mips::TEQ, ZeroReg, ZeroReg, 0x7, IDLoc, STI);
return false;
}
+ TOut.emitII(Mips::BREAK, 0x7, 0, IDLoc, STI);
+ return false;
}
- // FIXME: The values for these two BranchTarget variables may be different in
- // micromips. These magic numbers need to be removed.
- unsigned BranchTargetNoTraps;
- unsigned BranchTarget;
+ // Temporary label for first branch traget
+ MCContext &Context = TOut.getStreamer().getContext();
+ MCSymbol *BrTarget;
+ MCOperand LabelOp;
if (UseTraps) {
- BranchTarget = IsMips64 ? 12 : 8;
TOut.emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, STI);
} else {
- BranchTarget = IsMips64 ? 20 : 16;
- BranchTargetNoTraps = 8;
// Branch to the li instruction.
- TOut.emitRRI(Mips::BNE, RtReg, ZeroReg, BranchTargetNoTraps, IDLoc, STI);
+ BrTarget = Context.createTempSymbol();
+ LabelOp = MCOperand::createExpr(MCSymbolRefExpr::create(BrTarget, Context));
+ TOut.emitRRX(Mips::BNE, RtReg, ZeroReg, LabelOp, IDLoc, STI);
}
TOut.emitRR(DivOp, RsReg, RtReg, IDLoc, STI);
@@ -3269,6 +3559,9 @@ bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
TOut.emitII(Mips::BREAK, 0x7, 0, IDLoc, STI);
if (!Signed) {
+ if (!UseTraps)
+ TOut.getStreamer().EmitLabel(BrTarget);
+
TOut.emitR(Mips::MFLO, RdReg, IDLoc, STI);
return false;
}
@@ -3277,15 +3570,23 @@ bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
if (!ATReg)
return true;
+ if (!UseTraps)
+ TOut.getStreamer().EmitLabel(BrTarget);
+
TOut.emitRRI(Mips::ADDiu, ATReg, ZeroReg, -1, IDLoc, STI);
+
+ // Temporary label for the second branch target.
+ MCSymbol *BrTargetEnd = Context.createTempSymbol();
+ MCOperand LabelOpEnd =
+ MCOperand::createExpr(MCSymbolRefExpr::create(BrTargetEnd, Context));
+
+ // Branch to the mflo instruction.
+ TOut.emitRRX(Mips::BNE, RtReg, ATReg, LabelOpEnd, IDLoc, STI);
+
if (IsMips64) {
- // Branch to the mflo instruction.
- TOut.emitRRI(Mips::BNE, RtReg, ATReg, BranchTarget, IDLoc, STI);
TOut.emitRRI(Mips::ADDiu, ATReg, ZeroReg, 1, IDLoc, STI);
TOut.emitRRI(Mips::DSLL32, ATReg, ATReg, 0x1f, IDLoc, STI);
} else {
- // Branch to the mflo instruction.
- TOut.emitRRI(Mips::BNE, RtReg, ATReg, BranchTarget, IDLoc, STI);
TOut.emitRI(Mips::LUi, ATReg, (uint16_t)0x8000, IDLoc, STI);
}
@@ -3293,10 +3594,12 @@ bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
TOut.emitRRI(Mips::TEQ, RsReg, ATReg, 0x6, IDLoc, STI);
else {
// Branch to the mflo instruction.
- TOut.emitRRI(Mips::BNE, RsReg, ATReg, BranchTargetNoTraps, IDLoc, STI);
+ TOut.emitRRX(Mips::BNE, RsReg, ATReg, LabelOpEnd, IDLoc, STI);
TOut.emitRRI(Mips::SLL, ZeroReg, ZeroReg, 0, IDLoc, STI);
TOut.emitII(Mips::BREAK, 0x6, 0, IDLoc, STI);
}
+
+ TOut.getStreamer().EmitLabel(BrTargetEnd);
TOut.emitR(Mips::MFLO, RdReg, IDLoc, STI);
return false;
}
@@ -3503,10 +3806,10 @@ bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
const MCSubtargetInfo *STI) {
MipsTargetStreamer &TOut = getTargetStreamer();
- assert (Inst.getNumOperands() == 3 && "Invalid operand count");
- assert (Inst.getOperand(0).isReg() &&
- Inst.getOperand(1).isReg() &&
- Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+ assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isImm() && "Invalid instruction operand.");
unsigned ATReg = Mips::NoRegister;
unsigned FinalDstReg = Mips::NoRegister;
@@ -3514,7 +3817,7 @@ bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
unsigned SrcReg = Inst.getOperand(1).getReg();
int64_t ImmValue = Inst.getOperand(2).getImm();
- bool Is32Bit = isInt<32>(ImmValue) || isUInt<32>(ImmValue);
+ bool Is32Bit = isInt<32>(ImmValue) || (!isGP64bit() && isUInt<32>(ImmValue));
unsigned FinalOpcode = Inst.getOpcode();
@@ -3530,30 +3833,69 @@ bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
switch (FinalOpcode) {
default:
llvm_unreachable("unimplemented expansion");
- case (Mips::ADDi):
+ case Mips::ADDi:
FinalOpcode = Mips::ADD;
break;
- case (Mips::ADDiu):
+ case Mips::ADDiu:
FinalOpcode = Mips::ADDu;
break;
- case (Mips::ANDi):
+ case Mips::ANDi:
FinalOpcode = Mips::AND;
break;
- case (Mips::NORImm):
+ case Mips::NORImm:
FinalOpcode = Mips::NOR;
break;
- case (Mips::ORi):
+ case Mips::ORi:
FinalOpcode = Mips::OR;
break;
- case (Mips::SLTi):
+ case Mips::SLTi:
FinalOpcode = Mips::SLT;
break;
- case (Mips::SLTiu):
+ case Mips::SLTiu:
FinalOpcode = Mips::SLTu;
break;
- case (Mips::XORi):
+ case Mips::XORi:
FinalOpcode = Mips::XOR;
break;
+ case Mips::ADDi_MM:
+ FinalOpcode = Mips::ADD_MM;
+ break;
+ case Mips::ADDiu_MM:
+ FinalOpcode = Mips::ADDu_MM;
+ break;
+ case Mips::ANDi_MM:
+ FinalOpcode = Mips::AND_MM;
+ break;
+ case Mips::ORi_MM:
+ FinalOpcode = Mips::OR_MM;
+ break;
+ case Mips::SLTi_MM:
+ FinalOpcode = Mips::SLT_MM;
+ break;
+ case Mips::SLTiu_MM:
+ FinalOpcode = Mips::SLTu_MM;
+ break;
+ case Mips::XORi_MM:
+ FinalOpcode = Mips::XOR_MM;
+ break;
+ case Mips::ANDi64:
+ FinalOpcode = Mips::AND64;
+ break;
+ case Mips::NORImm64:
+ FinalOpcode = Mips::NOR64;
+ break;
+ case Mips::ORi64:
+ FinalOpcode = Mips::OR64;
+ break;
+ case Mips::SLTImm64:
+ FinalOpcode = Mips::SLT64;
+ break;
+ case Mips::SLTUImm64:
+ FinalOpcode = Mips::SLTu64;
+ break;
+ case Mips::XORi64:
+ FinalOpcode = Mips::XOR64;
+ break;
}
if (FinalDstReg == Mips::NoRegister)
@@ -3578,7 +3920,6 @@ bool MipsAsmParser::expandRotation(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
unsigned SecondShift = Mips::NOP;
if (hasMips32r2()) {
-
if (DReg == SReg) {
TmpReg = getATReg(Inst.getLoc());
if (!TmpReg)
@@ -3600,7 +3941,6 @@ bool MipsAsmParser::expandRotation(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
}
if (hasMips32()) {
-
switch (Inst.getOpcode()) {
default:
llvm_unreachable("unexpected instruction opcode");
@@ -3642,7 +3982,6 @@ bool MipsAsmParser::expandRotationImm(MCInst &Inst, SMLoc IDLoc,
unsigned SecondShift = Mips::NOP;
if (hasMips32r2()) {
-
if (Inst.getOpcode() == Mips::ROLImm) {
uint64_t MaxShift = 32;
uint64_t ShiftValue = ImmValue;
@@ -3661,7 +4000,6 @@ bool MipsAsmParser::expandRotationImm(MCInst &Inst, SMLoc IDLoc,
}
if (hasMips32()) {
-
if (ImmValue == 0) {
TOut.emitRRI(Mips::SRL, DReg, SReg, 0, Inst.getLoc(), STI);
return false;
@@ -3707,7 +4045,6 @@ bool MipsAsmParser::expandDRotation(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
unsigned SecondShift = Mips::NOP;
if (hasMips64r2()) {
-
if (TmpReg == SReg) {
TmpReg = getATReg(Inst.getLoc());
if (!TmpReg)
@@ -3729,7 +4066,6 @@ bool MipsAsmParser::expandDRotation(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
}
if (hasMips64()) {
-
switch (Inst.getOpcode()) {
default:
llvm_unreachable("unexpected instruction opcode");
@@ -3773,7 +4109,6 @@ bool MipsAsmParser::expandDRotationImm(MCInst &Inst, SMLoc IDLoc,
MCInst TmpInst;
if (hasMips64r2()) {
-
unsigned FinalOpcode = Mips::NOP;
if (ImmValue == 0)
FinalOpcode = Mips::DROTR;
@@ -3801,7 +4136,6 @@ bool MipsAsmParser::expandDRotationImm(MCInst &Inst, SMLoc IDLoc,
}
if (hasMips64()) {
-
if (ImmValue == 0) {
TOut.emitRRI(Mips::DSRL, DReg, SReg, 0, Inst.getLoc(), STI);
return false;
@@ -3871,6 +4205,119 @@ bool MipsAsmParser::expandAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return false;
}
+bool MipsAsmParser::expandMulImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ int32_t ImmValue = Inst.getOperand(2).getImm();
+
+ ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+
+ loadImmediate(ImmValue, ATReg, Mips::NoRegister, true, false, IDLoc, Out, STI);
+
+ TOut.emitRR(Inst.getOpcode() == Mips::MULImmMacro ? Mips::MULT : Mips::DMULT,
+ SrcReg, ATReg, IDLoc, STI);
+
+ TOut.emitR(Mips::MFLO, DstReg, IDLoc, STI);
+
+ return false;
+}
+
+bool MipsAsmParser::expandMulO(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ unsigned TmpReg = Inst.getOperand(2).getReg();
+
+ ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+
+ TOut.emitRR(Inst.getOpcode() == Mips::MULOMacro ? Mips::MULT : Mips::DMULT,
+ SrcReg, TmpReg, IDLoc, STI);
+
+ TOut.emitR(Mips::MFLO, DstReg, IDLoc, STI);
+
+ TOut.emitRRI(Inst.getOpcode() == Mips::MULOMacro ? Mips::SRA : Mips::DSRA32,
+ DstReg, DstReg, 0x1F, IDLoc, STI);
+
+ TOut.emitR(Mips::MFHI, ATReg, IDLoc, STI);
+
+ if (useTraps()) {
+ TOut.emitRRI(Mips::TNE, DstReg, ATReg, 6, IDLoc, STI);
+ } else {
+ MCContext & Context = TOut.getStreamer().getContext();
+ MCSymbol * BrTarget = Context.createTempSymbol();
+ MCOperand LabelOp =
+ MCOperand::createExpr(MCSymbolRefExpr::create(BrTarget, Context));
+
+ TOut.emitRRX(Mips::BEQ, DstReg, ATReg, LabelOp, IDLoc, STI);
+ if (AssemblerOptions.back()->isReorder())
+ TOut.emitNop(IDLoc, STI);
+ TOut.emitII(Mips::BREAK, 6, 0, IDLoc, STI);
+
+ TOut.getStreamer().EmitLabel(BrTarget);
+ }
+ TOut.emitR(Mips::MFLO, DstReg, IDLoc, STI);
+
+ return false;
+}
+
+bool MipsAsmParser::expandMulOU(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ unsigned TmpReg = Inst.getOperand(2).getReg();
+
+ ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+
+ TOut.emitRR(Inst.getOpcode() == Mips::MULOUMacro ? Mips::MULTu : Mips::DMULTu,
+ SrcReg, TmpReg, IDLoc, STI);
+
+ TOut.emitR(Mips::MFHI, ATReg, IDLoc, STI);
+ TOut.emitR(Mips::MFLO, DstReg, IDLoc, STI);
+ if (useTraps()) {
+ TOut.emitRRI(Mips::TNE, ATReg, Mips::ZERO, 6, IDLoc, STI);
+ } else {
+ MCContext & Context = TOut.getStreamer().getContext();
+ MCSymbol * BrTarget = Context.createTempSymbol();
+ MCOperand LabelOp =
+ MCOperand::createExpr(MCSymbolRefExpr::create(BrTarget, Context));
+
+ TOut.emitRRX(Mips::BEQ, ATReg, Mips::ZERO, LabelOp, IDLoc, STI);
+ if (AssemblerOptions.back()->isReorder())
+ TOut.emitNop(IDLoc, STI);
+ TOut.emitII(Mips::BREAK, 6, 0, IDLoc, STI);
+
+ TOut.getStreamer().EmitLabel(BrTarget);
+ }
+
+ return false;
+}
+
+bool MipsAsmParser::expandDMULMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ unsigned TmpReg = Inst.getOperand(2).getReg();
+
+ TOut.emitRR(Mips::DMULTu, SrcReg, TmpReg, IDLoc, STI);
+ TOut.emitR(Mips::MFLO, DstReg, IDLoc, STI);
+
+ return false;
+}
+
static unsigned nextReg(unsigned Reg) {
switch (Reg) {
case Mips::ZERO: return Mips::AT;
@@ -3985,7 +4432,6 @@ bool MipsAsmParser::expandSeq(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI) {
-
warnIfNoMacro(IDLoc);
MipsTargetStreamer &TOut = getTargetStreamer();
@@ -4158,17 +4604,15 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
-
MCInst Inst;
unsigned MatchResult =
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
- case Match_Success: {
+ case Match_Success:
if (processInstruction(Inst, IDLoc, Out, STI))
return true;
return false;
- }
case Match_MissingFeature:
Error(IDLoc, "instruction requires a CPU feature not currently enabled");
return true;
@@ -4441,7 +4885,6 @@ int MipsAsmParser::matchHWRegsRegisterName(StringRef Name) {
}
int MipsAsmParser::matchFPURegisterName(StringRef Name) {
-
if (Name[0] == 'f') {
StringRef NumString = Name.substr(1);
unsigned IntVal;
@@ -4455,7 +4898,6 @@ int MipsAsmParser::matchFPURegisterName(StringRef Name) {
}
int MipsAsmParser::matchFCCRegisterName(StringRef Name) {
-
if (Name.startswith("fcc")) {
StringRef NumString = Name.substr(3);
unsigned IntVal;
@@ -4469,7 +4911,6 @@ int MipsAsmParser::matchFCCRegisterName(StringRef Name) {
}
int MipsAsmParser::matchACRegisterName(StringRef Name) {
-
if (Name.startswith("ac")) {
StringRef NumString = Name.substr(2);
unsigned IntVal;
@@ -4511,6 +4952,10 @@ int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) {
return CC;
}
+bool MipsAsmParser::canUseATReg() {
+ return AssemblerOptions.back()->getATRegIndex() != 0;
+}
+
unsigned MipsAsmParser::getATReg(SMLoc Loc) {
unsigned ATIndex = AssemblerOptions.back()->getATRegIndex();
if (ATIndex == 0) {
@@ -4589,7 +5034,6 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
}
bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
-
switch (Expr->getKind()) {
case MCExpr::Constant:
return true;
@@ -5522,7 +5966,7 @@ bool MipsAsmParser::parseSetPushDirective() {
// Create a copy of the current assembler options environment and push it.
AssemblerOptions.push_back(
- make_unique<MipsAssemblerOptions>(AssemblerOptions.back().get()));
+ llvm::make_unique<MipsAssemblerOptions>(AssemblerOptions.back().get()));
getTargetStreamer().emitDirectiveSetPush();
return false;
@@ -5914,6 +6358,14 @@ bool MipsAsmParser::parseDirectiveSet() {
return parseSetAtDirective();
} else if (Tok.getString() == "arch") {
return parseSetArchDirective();
+ } else if (Tok.getString() == "bopt") {
+ Warning(Tok.getLoc(), "'bopt' feature is unsupported");
+ getParser().Lex();
+ return false;
+ } else if (Tok.getString() == "nobopt") {
+ // We're already running in nobopt mode, so nothing to do.
+ getParser().Lex();
+ return false;
} else if (Tok.getString() == "fp") {
return parseSetFpDirective();
} else if (Tok.getString() == "oddspreg") {
@@ -6001,7 +6453,7 @@ bool MipsAsmParser::parseDirectiveSet() {
bool MipsAsmParser::parseDataDirective(unsigned Size, SMLoc L) {
MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- for (;;) {
+ while (true) {
const MCExpr *Value;
if (getParser().parseExpression(Value))
return true;
@@ -6773,3 +7225,15 @@ extern "C" void LLVMInitializeMipsAsmParser() {
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#include "MipsGenAsmMatcher.inc"
+
+bool MipsAsmParser::mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) {
+ // Find the appropriate table for this asm variant.
+ const MatchEntry *Start, *End;
+ switch (VariantID) {
+ default: llvm_unreachable("invalid variant!");
+ case 0: Start = std::begin(MatchTable0); End = std::end(MatchTable0); break;
+ }
+ // Search the table.
+ auto MnemonicRange = std::equal_range(Start, End, Mnemonic, LessOpcode());
+ return MnemonicRange.first != MnemonicRange.second;
+}
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index f80efb18507b..ecdf6b0de6e7 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -1,4 +1,4 @@
-//===- MipsDisassembler.cpp - Disassembler for Mips -------------*- C++ -*-===//
+//===- MipsDisassembler.cpp - Disassembler for Mips -----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,15 +12,21 @@
//===----------------------------------------------------------------------===//
#include "Mips.h"
-#include "MipsRegisterInfo.h"
-#include "MipsSubtarget.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -33,6 +39,7 @@ namespace {
class MipsDisassembler : public MCDisassembler {
bool IsMicroMips;
bool IsBigEndian;
+
public:
MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool IsBigEndian)
: MCDisassembler(STI, Ctx),
@@ -42,9 +49,11 @@ public:
bool hasMips2() const { return STI.getFeatureBits()[Mips::FeatureMips2]; }
bool hasMips3() const { return STI.getFeatureBits()[Mips::FeatureMips3]; }
bool hasMips32() const { return STI.getFeatureBits()[Mips::FeatureMips32]; }
+
bool hasMips32r6() const {
return STI.getFeatureBits()[Mips::FeatureMips32r6];
}
+
bool isFP64() const { return STI.getFeatureBits()[Mips::FeatureFP64Bit]; }
bool isGP64() const { return STI.getFeatureBits()[Mips::FeatureGP64Bit]; }
@@ -527,11 +536,13 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn,
const void *Decoder);
namespace llvm {
+
Target &getTheMipselTarget();
Target &getTheMipsTarget();
Target &getTheMips64Target();
Target &getTheMips64elTarget();
-}
+
+} // end namespace llvm
static MCDisassembler *createMipsDisassembler(
const Target &T,
@@ -1106,6 +1117,7 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
raw_ostream &CStream) const {
uint32_t Insn;
DecodeStatus Result;
+ Size = 0;
if (IsMicroMips) {
Result = readInstruction16(Bytes, Address, Size, Insn, IsBigEndian);
@@ -1168,98 +1180,88 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
}
}
- // This is an invalid instruction. Let the disassembler move forward by the
- // minimum instruction size.
+ // This is an invalid instruction. Claim that the Size is 2 bytes. Since
+ // microMIPS instructions have a minimum alignment of 2, the next 2 bytes
+ // could form a valid instruction. The two bytes we rejected as an
+ // instruction could have actually beeen an inline constant pool that is
+ // unconditionally branched over.
Size = 2;
return MCDisassembler::Fail;
}
+ // Attempt to read the instruction so that we can attempt to decode it. If
+ // the buffer is not 4 bytes long, let the higher level logic figure out
+ // what to do with a size of zero and MCDisassembler::Fail.
Result = readInstruction32(Bytes, Address, Size, Insn, IsBigEndian, false);
- if (Result == MCDisassembler::Fail) {
- Size = 4;
+ if (Result == MCDisassembler::Fail)
return MCDisassembler::Fail;
- }
+
+ // The only instruction size for standard encoded MIPS.
+ Size = 4;
if (hasCOP3()) {
DEBUG(dbgs() << "Trying COP3_ table (32-bit opcodes):\n");
Result =
decodeInstruction(DecoderTableCOP3_32, Instr, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
if (hasMips32r6() && isGP64()) {
DEBUG(dbgs() << "Trying Mips32r6_64r6 (GPR64) table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips32r6_64r6_GP6432, Instr, Insn,
Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
if (hasMips32r6() && isPTR64()) {
DEBUG(dbgs() << "Trying Mips32r6_64r6 (PTR64) table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips32r6_64r6_PTR6432, Instr, Insn,
Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
if (hasMips32r6()) {
DEBUG(dbgs() << "Trying Mips32r6_64r6 table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips32r6_64r632, Instr, Insn,
Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
if (hasMips2() && isPTR64()) {
DEBUG(dbgs() << "Trying Mips32r6_64r6 (PTR64) table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips32_64_PTR6432, Instr, Insn,
Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
if (hasCnMips()) {
DEBUG(dbgs() << "Trying CnMips table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableCnMips32, Instr, Insn,
Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
if (isGP64()) {
DEBUG(dbgs() << "Trying Mips64 (GPR64) table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips6432, Instr, Insn,
Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
DEBUG(dbgs() << "Trying Mips table (32-bit opcodes):\n");
// Calling the auto-generated decoder function.
Result =
decodeInstruction(DecoderTableMips32, Instr, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
- Size = 4;
return MCDisassembler::Fail;
}
@@ -1267,16 +1269,13 @@ static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
const void *Decoder) {
-
return MCDisassembler::Fail;
-
}
static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
const void *Decoder) {
-
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -1620,7 +1619,7 @@ static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
switch(Inst.getOpcode())
{
default:
- assert (0 && "Unexpected instruction");
+ assert(false && "Unexpected instruction");
return MCDisassembler::Fail;
break;
case Mips::LD_B:
@@ -1980,7 +1979,6 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
if (RegNo > 30 || RegNo %2)
return MCDisassembler::Fail;
- ;
unsigned Reg = getReg(Decoder, Mips::AFGR64RegClassID, RegNo /2);
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
@@ -2128,7 +2126,6 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder) {
-
unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2;
Inst.addOperand(MCOperand::createImm(JumpOffset));
return MCDisassembler::Success;
@@ -2267,7 +2264,14 @@ static DecodeStatus DecodeInsSize(MCInst &Inst,
const void *Decoder) {
// First we need to grab the pos(lsb) from MCInst.
int Pos = Inst.getOperand(2).getImm();
- int Size = (int) Insn - Pos + 1;
+ if (Inst.getOpcode() == Mips::DINSU)
+ Pos += 32;
+ int Size;
+ if (Inst.getOpcode() == Mips::DINSM ||
+ Inst.getOpcode() == Mips::DINSU)
+ Size = (int) Insn - Pos + 33;
+ else
+ Size = (int) Insn - Pos + 1;
Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Size)));
return MCDisassembler::Success;
}
@@ -2363,7 +2367,6 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
-
unsigned RegPair = fieldFromInstruction(Insn, 7, 3);
switch (RegPair) {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
index 932d38a0b9fe..4a2b75b9ae46 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
@@ -1,4 +1,4 @@
-//===-- MipsABIFlagsSection.cpp - Mips ELF ABI Flags Section ---*- C++ -*--===//
+//===- MipsABIFlagsSection.cpp - Mips ELF ABI Flags Section ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,11 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsABIFlagsSection.h"
+#include "MCTargetDesc/MipsABIFlagsSection.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MipsABIFlags.h"
using namespace llvm;
@@ -51,6 +55,7 @@ uint8_t MipsABIFlagsSection::getCPR1SizeValue() {
}
namespace llvm {
+
MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) {
// Write out a Elf_Internal_ABIFlags_v0 struct
OS.EmitIntValue(ABIFlagsSection.getVersionValue(), 2); // version
@@ -66,4 +71,5 @@ MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) {
OS.EmitIntValue(ABIFlagsSection.getFlags2Value(), 4); // flags2
return OS;
}
-}
+
+} // end namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
index 3966cae9fe33..f38541027023 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -1,4 +1,4 @@
-//===-- MipsABIFlagsSection.h - Mips ELF ABI Flags Section -----*- C++ -*--===//
+//===- MipsABIFlagsSection.h - Mips ELF ABI Flags Section -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,9 +10,10 @@
#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MipsABIFlags.h"
+#include <cstdint>
namespace llvm {
@@ -23,36 +24,32 @@ struct MipsABIFlagsSection {
enum class FpABIKind { ANY, XX, S32, S64, SOFT };
// Version of flags structure.
- uint16_t Version;
+ uint16_t Version = 0;
// The level of the ISA: 1-5, 32, 64.
- uint8_t ISALevel;
+ uint8_t ISALevel = 0;
// The revision of ISA: 0 for MIPS V and below, 1-n otherwise.
- uint8_t ISARevision;
+ uint8_t ISARevision = 0;
// The size of general purpose registers.
- Mips::AFL_REG GPRSize;
+ Mips::AFL_REG GPRSize = Mips::AFL_REG_NONE;
// The size of co-processor 1 registers.
- Mips::AFL_REG CPR1Size;
+ Mips::AFL_REG CPR1Size = Mips::AFL_REG_NONE;
// The size of co-processor 2 registers.
- Mips::AFL_REG CPR2Size;
+ Mips::AFL_REG CPR2Size = Mips::AFL_REG_NONE;
// Processor-specific extension.
- Mips::AFL_EXT ISAExtension;
+ Mips::AFL_EXT ISAExtension = Mips::AFL_EXT_NONE;
// Mask of ASEs used.
- uint32_t ASESet;
+ uint32_t ASESet = 0;
- bool OddSPReg;
+ bool OddSPReg = false;
- bool Is32BitABI;
+ bool Is32BitABI = false;
protected:
// The floating-point ABI.
- FpABIKind FpABI;
+ FpABIKind FpABI = FpABIKind::ANY;
public:
- MipsABIFlagsSection()
- : Version(0), ISALevel(0), ISARevision(0), GPRSize(Mips::AFL_REG_NONE),
- CPR1Size(Mips::AFL_REG_NONE), CPR2Size(Mips::AFL_REG_NONE),
- ISAExtension(Mips::AFL_EXT_NONE), ASESet(0), OddSPReg(false),
- Is32BitABI(false), FpABI(FpABIKind::ANY) {}
+ MipsABIFlagsSection() = default;
uint16_t getVersionValue() { return (uint16_t)Version; }
uint8_t getISALevelValue() { return (uint8_t)ISALevel; }
@@ -80,6 +77,7 @@ public:
FpABI = Value;
Is32BitABI = IsABI32Bit;
}
+
StringRef getFpABIString(FpABIKind Value);
template <class PredicateLibrary>
@@ -195,6 +193,7 @@ public:
};
MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection);
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 38b11f78e36d..3304449efb91 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
// Prepare value for the target space for it
static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- MCContext *Ctx = nullptr) {
+ MCContext &Ctx) {
unsigned Kind = Fixup.getKind();
@@ -74,8 +74,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// address range. Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 4;
// We now check if Value can be encoded as a 16-bit signed immediate.
- if (!isInt<16>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC16 fixup");
+ if (!isInt<16>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC16 fixup");
return 0;
}
break;
@@ -84,8 +84,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 4;
// We now check if Value can be encoded as a 19-bit signed immediate.
- if (!isInt<19>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC19 fixup");
+ if (!isInt<19>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC19 fixup");
return 0;
}
break;
@@ -121,8 +121,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 2;
// We now check if Value can be encoded as a 7-bit signed immediate.
- if (!isInt<7>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC7 fixup");
+ if (!isInt<7>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC7 fixup");
return 0;
}
break;
@@ -131,8 +131,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 2;
// We now check if Value can be encoded as a 10-bit signed immediate.
- if (!isInt<10>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC10 fixup");
+ if (!isInt<10>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC10 fixup");
return 0;
}
break;
@@ -141,8 +141,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 2;
// We now check if Value can be encoded as a 16-bit signed immediate.
- if (!isInt<16>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC16 fixup");
+ if (!isInt<16>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC16 fixup");
return 0;
}
break;
@@ -150,21 +150,21 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 8;
// We now check if Value can be encoded as a 18-bit signed immediate.
- if (!isInt<18>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC18 fixup");
+ if (!isInt<18>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC18 fixup");
return 0;
}
break;
case Mips::fixup_MICROMIPS_PC18_S3:
// Check alignment.
- if ((Value & 7) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC18 fixup");
+ if ((Value & 7)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC18 fixup");
}
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 8;
// We now check if Value can be encoded as a 18-bit signed immediate.
- if (!isInt<18>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC18 fixup");
+ if (!isInt<18>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC18 fixup");
return 0;
}
break;
@@ -172,8 +172,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 4;
// We now check if Value can be encoded as a 21-bit signed immediate.
- if (!isInt<21>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC21 fixup");
+ if (!isInt<21>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC21 fixup");
return 0;
}
break;
@@ -181,8 +181,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 4;
// We now check if Value can be encoded as a 26-bit signed immediate.
- if (!isInt<26>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC26 fixup");
+ if (!isInt<26>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC26 fixup");
return 0;
}
break;
@@ -190,8 +190,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 2;
// We now check if Value can be encoded as a 26-bit signed immediate.
- if (!isInt<26>(Value) && Ctx) {
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC26 fixup");
+ if (!isInt<26>(Value)) {
+ Ctx.reportFatalError(Fixup.getLoc(), "out of range PC26 fixup");
return 0;
}
break;
@@ -199,8 +199,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 2;
// We now check if Value can be encoded as a 21-bit signed immediate.
- if (!isInt<21>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC21 fixup");
+ if (!isInt<21>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC21 fixup");
return 0;
}
break;
@@ -236,10 +236,10 @@ static unsigned calculateMMLEIndex(unsigned i) {
/// data fragment, at the offset specified by the fixup and following the
/// fixup kind as appropriate.
void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ unsigned DataSize, uint64_t Value, bool IsPCRel,
+ MCContext &Ctx) const {
MCFixupKind Kind = Fixup.getKind();
- Value = adjustFixupValue(Fixup, Value);
+ Value = adjustFixupValue(Fixup, Value, Ctx);
if (!Value)
return; // Doesn't change encoding.
@@ -471,24 +471,6 @@ bool MipsAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
-/// processFixupValue - Target hook to process the literal value of a fixup
-/// if necessary.
-void MipsAsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFixup &Fixup,
- const MCFragment *DF,
- const MCValue &Target,
- uint64_t &Value,
- bool &IsResolved) {
- // At this point we'll ignore the value returned by adjustFixupValue as
- // we are only checking if the fixup can be applied correctly. We have
- // access to MCContext from here which allows us to report a fatal error
- // with *possibly* a source code location.
- // The caller will also ignore any changes we make to Value
- // (recordRelocation() overwrites it with it's own calculation).
- (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
-}
-
// MCAsmBackend
MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T,
const MCRegisterInfo &MRI,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index f260cfa566c9..4b3cc6e21f4c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -39,7 +39,7 @@ public:
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
@@ -82,11 +82,6 @@ public:
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override;
-
}; // class MipsAsmBackend
} // namespace
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index b2efd726da53..324fd3c6fe14 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -7,33 +7,38 @@
//
//===----------------------------------------------------------------------===//
-#include <algorithm>
-#include <list>
-#include "MCTargetDesc/MipsBaseInfo.h"
#include "MCTargetDesc/MipsFixupKinds.h"
-#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <list>
+#include <utility>
#define DEBUG_TYPE "mips-elf-object-writer"
using namespace llvm;
namespace {
+
/// Holds additional information needed by the relocation ordering algorithm.
struct MipsRelocationEntry {
const ELFRelocationEntry R; ///< The relocation.
- bool Matched; ///< Is this relocation part of a match.
+ bool Matched = false; ///< Is this relocation part of a match.
- MipsRelocationEntry(const ELFRelocationEntry &R) : R(R), Matched(false) {}
+ MipsRelocationEntry(const ELFRelocationEntry &R) : R(R) {}
void print(raw_ostream &Out) const {
R.print(Out);
@@ -53,23 +58,33 @@ public:
MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64,
bool IsLittleEndian);
- ~MipsELFObjectWriter() override;
+ ~MipsELFObjectWriter() override = default;
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
- virtual void sortRelocs(const MCAssembler &Asm,
- std::vector<ELFRelocationEntry> &Relocs) override;
+ void sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs) override;
+};
+
+/// The possible results of the Predicate function used by find_best.
+enum FindBestPredicateResult {
+ FindBest_NoMatch = 0, ///< The current element is not a match.
+ FindBest_Match, ///< The current element is a match but better ones are
+ /// possible.
+ FindBest_PerfectMatch, ///< The current element is an unbeatable match.
};
+} // end anonymous namespace
+
/// Copy elements in the range [First, Last) to d1 when the predicate is true or
/// d2 when the predicate is false. This is essentially both std::copy_if and
/// std::remove_copy_if combined into a single pass.
template <class InputIt, class OutputIt1, class OutputIt2, class UnaryPredicate>
-std::pair<OutputIt1, OutputIt2> copy_if_else(InputIt First, InputIt Last,
- OutputIt1 d1, OutputIt2 d2,
- UnaryPredicate Predicate) {
+static std::pair<OutputIt1, OutputIt2> copy_if_else(InputIt First, InputIt Last,
+ OutputIt1 d1, OutputIt2 d2,
+ UnaryPredicate Predicate) {
for (InputIt I = First; I != Last; ++I) {
if (Predicate(*I)) {
*d1 = *I;
@@ -83,14 +98,6 @@ std::pair<OutputIt1, OutputIt2> copy_if_else(InputIt First, InputIt Last,
return std::make_pair(d1, d2);
}
-/// The possible results of the Predicate function used by find_best.
-enum FindBestPredicateResult {
- FindBest_NoMatch = 0, ///< The current element is not a match.
- FindBest_Match, ///< The current element is a match but better ones are
- /// possible.
- FindBest_PerfectMatch, ///< The current element is an unbeatable match.
-};
-
/// Find the best match in the range [First, Last).
///
/// An element matches when Predicate(X) returns FindBest_Match or
@@ -101,8 +108,8 @@ enum FindBestPredicateResult {
/// This is similar to std::find_if but finds the best of multiple possible
/// matches.
template <class InputIt, class UnaryPredicate, class Comparator>
-InputIt find_best(InputIt First, InputIt Last, UnaryPredicate Predicate,
- Comparator BetterThan) {
+static InputIt find_best(InputIt First, InputIt Last, UnaryPredicate Predicate,
+ Comparator BetterThan) {
InputIt Best = Last;
for (InputIt I = First; I != Last; ++I) {
@@ -202,16 +209,12 @@ static void dumpRelocs(const char *Prefix, const Container &Relocs) {
}
#endif
-} // end anonymous namespace
-
MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
bool _isN64, bool IsLittleEndian)
: MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
/*HasRelocationAddend*/ _isN64,
/*IsN64*/ _isN64) {}
-MipsELFObjectWriter::~MipsELFObjectWriter() {}
-
unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
const MCFixup &Fixup,
@@ -419,7 +422,6 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
/// always match using the expressions from the source.
void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
std::vector<ELFRelocationEntry> &Relocs) {
-
// We do not need to sort the relocation table for RELA relocations which
// N32/N64 uses as the relocation addend contains the value we require,
// rather than it being split across a pair of relocations.
@@ -524,6 +526,8 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
case ELF::R_MIPS_GOT16:
case ELF::R_MIPS16_GOT16:
case ELF::R_MICROMIPS_GOT16:
+ case ELF::R_MIPS_HIGHER:
+ case ELF::R_MIPS_HIGHEST:
case ELF::R_MIPS_HI16:
case ELF::R_MIPS16_HI16:
case ELF::R_MICROMIPS_HI16:
@@ -567,8 +571,6 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
case ELF::R_MIPS_INSERT_A:
case ELF::R_MIPS_INSERT_B:
case ELF::R_MIPS_DELETE:
- case ELF::R_MIPS_HIGHER:
- case ELF::R_MIPS_HIGHEST:
case ELF::R_MIPS_CALL_HI16:
case ELF::R_MIPS_CALL_LO16:
case ELF::R_MIPS_SCN_DISP:
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index e7d687e89a8a..ae3278322311 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -8,15 +8,19 @@
//===----------------------------------------------------------------------===//
#include "MipsELFStreamer.h"
+#include "MipsOptionRecord.h"
#include "MipsTargetStreamer.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ELF.h"
using namespace llvm;
void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI, bool) {
MCELFStreamer::EmitInstruction(Inst, STI);
MCContext &Context = getContext();
@@ -51,7 +55,7 @@ void MipsELFStreamer::createPendingLabelRelocs() {
Labels.clear();
}
-void MipsELFStreamer::EmitLabel(MCSymbol *Symbol) {
+void MipsELFStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) {
MCELFStreamer::EmitLabel(Symbol);
Labels.push_back(Symbol);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index a241cdebdcc8..f5eda112817e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -1,4 +1,4 @@
-//===-------- MipsELFStreamer.h - ELF Object Output -----------------------===//
+//===- MipsELFStreamer.h - ELF Object Output --------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -21,6 +21,7 @@
#include <memory>
namespace llvm {
+
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
@@ -31,12 +32,10 @@ class MipsELFStreamer : public MCELFStreamer {
MipsRegInfoRecord *RegInfoRecord;
SmallVector<MCSymbol*, 4> Labels;
-
public:
MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter)
: MCELFStreamer(Context, MAB, OS, Emitter) {
-
RegInfoRecord = new MipsRegInfoRecord(this, Context);
MipsOptionRecords.push_back(
std::unique_ptr<MipsRegInfoRecord>(RegInfoRecord));
@@ -46,12 +45,13 @@ public:
/// \p Inst is actually emitted. For example, we can inspect the operands and
/// gather sufficient information that allows us to reason about the register
/// usage for the translation unit.
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool = false) override;
/// Overriding this function allows us to record all labels that should be
/// marked as microMIPS. Based on this data marking is done in
/// EmitInstruction.
- void EmitLabel(MCSymbol *Symbol) override;
+ void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
/// Overriding this function allows us to dismiss all labels that are
/// candidates for marking as microMIPS when .section directive is processed.
@@ -72,5 +72,6 @@ public:
MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool RelaxAll);
-} // namespace llvm.
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSELFSTREAMER_H
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index a44a35f49e5f..ebe3c5784888 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -19,9 +19,7 @@ using namespace llvm;
void MipsMCAsmInfo::anchor() { }
MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
- if ((TheTriple.getArch() == Triple::mips) ||
- (TheTriple.getArch() == Triple::mips64))
- IsLittleEndian = false;
+ IsLittleEndian = TheTriple.isLittleEndian();
if ((TheTriple.getArch() == Triple::mips64el) ||
(TheTriple.getArch() == Triple::mips64)) {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 0614316d5ac7..5685f0426e9b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -10,22 +10,29 @@
// This file implements the MipsMCCodeEmitter class.
//
//===----------------------------------------------------------------------===//
-//
-#include "MipsMCCodeEmitter.h"
#include "MCTargetDesc/MipsFixupKinds.h"
#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "MipsMCCodeEmitter.h"
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
@@ -34,6 +41,7 @@
#undef GET_INSTRMAP_INFO
namespace llvm {
+
MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx) {
@@ -45,12 +53,12 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
MCContext &Ctx) {
return new MipsMCCodeEmitter(MCII, Ctx, true);
}
-} // End of namespace llvm.
+
+} // end namespace llvm
// If the D<shift> instruction has a shift amount that is greater
// than 31 (checked in calling routine), lower it to a D<shift>32 instruction
static void LowerLargeShift(MCInst& Inst) {
-
assert(Inst.getNumOperands() == 3 && "Invalid no. of operands for shift!");
assert(Inst.getOperand(2).isImm());
@@ -103,24 +111,25 @@ static void LowerDins(MCInst& InstIn) {
assert(InstIn.getOperand(3).isImm());
int64_t size = InstIn.getOperand(3).getImm();
- if (size <= 32) {
- if (pos < 32) // DINS, do nothing
- return;
+ assert((pos + size) <= 64 &&
+ "DINS cannot have position plus size over 64");
+ if (pos < 32) {
+ if ((pos + size) > 0 && (pos + size) <= 32)
+ return; // DINS, do nothing
+ else if ((pos + size) > 32) {
+ //DINSM
+ InstIn.getOperand(3).setImm(size - 32);
+ InstIn.setOpcode(Mips::DINSM);
+ }
+ } else if ((pos + size) > 32 && (pos + size) <= 64) {
// DINSU
InstIn.getOperand(2).setImm(pos - 32);
InstIn.setOpcode(Mips::DINSU);
- return;
}
- // DINSM
- assert(pos < 32 && "DINS cannot have both size and pos > 32");
- InstIn.getOperand(3).setImm(size - 32);
- InstIn.setOpcode(Mips::DINSM);
- return;
}
// Fix a bad compact branch encoding for beqc/bnec.
void MipsMCCodeEmitter::LowerCompactBranch(MCInst& Inst) const {
-
// Encoding may be illegal !(rs < rt), but this situation is
// easily fixed.
unsigned RegOp0 = Inst.getOperand(0).getReg();
@@ -146,7 +155,6 @@ void MipsMCCodeEmitter::LowerCompactBranch(MCInst& Inst) const {
Inst.getOperand(0).setReg(RegOp1);
Inst.getOperand(1).setReg(RegOp0);
-
}
bool MipsMCCodeEmitter::isMicroMips(const MCSubtargetInfo &STI) const {
@@ -186,7 +194,6 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const
{
-
// Non-pseudo instructions that get changed for direct object
// only based on operand values.
// If this list of instructions get much longer we will move
@@ -272,7 +279,6 @@ unsigned MipsMCCodeEmitter::
getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 4.
@@ -295,7 +301,6 @@ unsigned MipsMCCodeEmitter::
getBranchTargetOpValue1SImm16(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
@@ -318,7 +323,6 @@ unsigned MipsMCCodeEmitter::
getBranchTargetOpValueMMR6(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
@@ -342,7 +346,6 @@ unsigned MipsMCCodeEmitter::
getBranchTargetOpValueLsl2MMR6(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 4.
@@ -366,7 +369,6 @@ unsigned MipsMCCodeEmitter::
getBranchTarget7OpValueMM(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
@@ -388,7 +390,6 @@ unsigned MipsMCCodeEmitter::
getBranchTargetOpValueMMPC10(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
@@ -410,7 +411,6 @@ unsigned MipsMCCodeEmitter::
getBranchTargetOpValueMM(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
@@ -433,7 +433,6 @@ unsigned MipsMCCodeEmitter::
getBranchTarget21OpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 4.
@@ -456,7 +455,6 @@ unsigned MipsMCCodeEmitter::
getBranchTarget21OpValueMM(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 4.
@@ -479,7 +477,6 @@ unsigned MipsMCCodeEmitter::
getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 4.
@@ -501,7 +498,6 @@ getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo,
unsigned MipsMCCodeEmitter::getBranchTarget26OpValueMM(
const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
@@ -525,7 +521,6 @@ unsigned MipsMCCodeEmitter::
getJumpOffset16OpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm()) return MO.getImm();
@@ -544,7 +539,6 @@ unsigned MipsMCCodeEmitter::
getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 4.
if (MO.isImm()) return MO.getImm()>>2;
@@ -562,7 +556,6 @@ unsigned MipsMCCodeEmitter::
getJumpTargetOpValueMM(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
if (MO.isImm()) return MO.getImm() >> 1;
@@ -580,7 +573,6 @@ unsigned MipsMCCodeEmitter::
getUImm5Lsl2Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm()) {
// The immediate is encoded as 'immediate << 2'.
@@ -599,7 +591,6 @@ unsigned MipsMCCodeEmitter::
getSImm3Lsa2Value(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm()) {
int Value = MO.getImm();
@@ -613,7 +604,6 @@ unsigned MipsMCCodeEmitter::
getUImm6Lsl2Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm()) {
unsigned Value = MO.getImm();
@@ -627,7 +617,6 @@ unsigned MipsMCCodeEmitter::
getSImm9AddiuspValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm()) {
unsigned Binary = (MO.getImm() >> 2) & 0x0000ffff;
@@ -711,7 +700,7 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
case MipsMCExpr::MEK_GPREL:
FixupKind = Mips::fixup_Mips_GPREL16;
break;
- case MipsMCExpr::MEK_LO: {
+ case MipsMCExpr::MEK_LO:
// Check for %lo(%neg(%gp_rel(X)))
if (MipsExpr->isGpOff()) {
FixupKind = Mips::fixup_Mips_GPOFF_LO;
@@ -720,7 +709,6 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
FixupKind = isMicroMips(STI) ? Mips::fixup_MICROMIPS_LO16
: Mips::fixup_Mips_LO16;
break;
- }
case MipsMCExpr::MEK_HIGHEST:
FixupKind = Mips::fixup_Mips_HIGHEST;
break;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 2d041dcbf040..d12d3195521a 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -1,4 +1,4 @@
-//===-- MipsMCCodeEmitter.h - Convert Mips Code to Machine Code -----------===//
+//===- MipsMCCodeEmitter.h - Convert Mips Code to Machine Code --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,29 +10,25 @@
// This file defines the MipsMCCodeEmitter class.
//
//===----------------------------------------------------------------------===//
-//
#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCCODEEMITTER_H
#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCCODEEMITTER_H
#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/Support/DataTypes.h"
-
-using namespace llvm;
+#include <cstdint>
namespace llvm {
+
class MCContext;
class MCExpr;
+class MCFixup;
class MCInst;
class MCInstrInfo;
-class MCFixup;
class MCOperand;
class MCSubtargetInfo;
class raw_ostream;
class MipsMCCodeEmitter : public MCCodeEmitter {
- MipsMCCodeEmitter(const MipsMCCodeEmitter &) = delete;
- void operator=(const MipsMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
MCContext &Ctx;
bool IsLittleEndian;
@@ -43,8 +39,9 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
public:
MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle)
: MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
-
- ~MipsMCCodeEmitter() override {}
+ MipsMCCodeEmitter(const MipsMCCodeEmitter &) = delete;
+ MipsMCCodeEmitter &operator=(const MipsMCCodeEmitter &) = delete;
+ ~MipsMCCodeEmitter() override = default;
void EmitByte(unsigned char C, raw_ostream &OS) const;
@@ -270,9 +267,11 @@ public:
unsigned getRegisterListOpValue16(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- private:
+
+private:
void LowerCompactBranch(MCInst& Inst) const;
-}; // class MipsMCCodeEmitter
-} // namespace llvm.
+};
+
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCCODEEMITTER_H
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 082bb87fcb8a..be04480044d4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -11,9 +11,15 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
using namespace llvm;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index d1a4334ec640..495d525ccff4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -1,4 +1,4 @@
-//===-- MipsMCExpr.h - Mips specific MC expression classes ------*- C++ -*-===//
+//===- MipsMCExpr.h - Mips specific MC expression classes -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -70,6 +70,7 @@ public:
bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
+
MCFragment *findAssociatedFragment() const override {
return getSubExpr()->findAssociatedFragment();
}
@@ -86,6 +87,7 @@ public:
return isGpOff(Kind);
}
};
+
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCEXPR_H
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index aef9bd3a8e2a..9266f0e216d1 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -20,7 +20,11 @@
#include "Mips.h"
#include "MipsELFStreamer.h"
#include "MipsMCNaCl.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
using namespace llvm;
@@ -38,14 +42,14 @@ class MipsNaClELFStreamer : public MipsELFStreamer {
public:
MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
- : MipsELFStreamer(Context, TAB, OS, Emitter), PendingCall(false) {}
+ : MipsELFStreamer(Context, TAB, OS, Emitter) {}
- ~MipsNaClELFStreamer() override {}
+ ~MipsNaClELFStreamer() override = default;
private:
// Whether we started the sandboxing sequence for calls. Calls are bundled
// with branch delays and aligned to the bundle end.
- bool PendingCall;
+ bool PendingCall = false;
bool isIndirectJump(const MCInst &MI) {
if (MI.getOpcode() == Mips::JALR) {
@@ -135,8 +139,8 @@ private:
public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to mask dangerous instructions.
- void EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) override {
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override {
// Sandbox indirect jumps.
if (isIndirectJump(Inst)) {
if (PendingCall)
@@ -265,4 +269,4 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
return S;
}
-}
+} // end namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index 24b602810d6e..74d5e4cc9841 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -1,4 +1,4 @@
-//===-- MipsOptionRecord.cpp - Abstraction for storing information --------===//
+//===- MipsOptionRecord.cpp - Abstraction for storing information ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,10 +7,16 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsOptionRecord.h"
+#include "MipsABIInfo.h"
#include "MipsELFStreamer.h"
+#include "MipsOptionRecord.h"
#include "MipsTargetStreamer.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+#include <cassert>
using namespace llvm;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 7f79eb400f59..2d4083b27ed1 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsTargetStreamer.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MipsELFStreamer.h"
@@ -685,6 +686,17 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
// issues as well.
unsigned EFlags = MCA.getELFHeaderEFlags();
+ // FIXME: Fix a dependency issue by instantiating the ABI object to some
+ // default based off the triple. The triple doesn't describe the target
+ // fully, but any external user of the API that uses the MCTargetStreamer
+ // would otherwise crash on assertion failure.
+
+ ABI = MipsABIInfo(
+ STI.getTargetTriple().getArch() == Triple::ArchType::mipsel ||
+ STI.getTargetTriple().getArch() == Triple::ArchType::mips
+ ? MipsABIInfo::O32()
+ : MipsABIInfo::N64());
+
// Architecture
if (Features[Mips::FeatureMips64r6])
EFlags |= ELF::EF_MIPS_ARCH_64R6;
@@ -721,23 +733,18 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
if (Features[Mips::FeatureNaN2008])
EFlags |= ELF::EF_MIPS_NAN2008;
- // -mabicalls and -mplt are not implemented but we should act as if they were
- // given.
- EFlags |= ELF::EF_MIPS_CPIC;
-
MCA.setELFHeaderEFlags(EFlags);
}
void MipsTargetELFStreamer::emitLabel(MCSymbol *S) {
auto *Symbol = cast<MCSymbolELF>(S);
- if (!isMicroMipsEnabled())
- return;
getStreamer().getAssembler().registerSymbol(*Symbol);
uint8_t Type = Symbol->getType();
if (Type != ELF::STT_FUNC)
return;
- Symbol->setOther(ELF::STO_MIPS_MICROMIPS);
+ if (isMicroMipsEnabled())
+ Symbol->setOther(ELF::STO_MIPS_MICROMIPS);
}
void MipsTargetELFStreamer::finish() {
@@ -795,10 +802,13 @@ void MipsTargetELFStreamer::finish() {
} else if (Features[Mips::FeatureMips64r2] || Features[Mips::FeatureMips64])
EFlags |= ELF::EF_MIPS_32BITMODE;
- // If we've set the cpic eflag and we're n64, go ahead and set the pic
- // one as well.
- if (EFlags & ELF::EF_MIPS_CPIC && getABI().IsN64())
- EFlags |= ELF::EF_MIPS_PIC;
+ // -mplt is not implemented but we should act as if it was
+ // given.
+ if (!Features[Mips::FeatureNoABICalls])
+ EFlags |= ELF::EF_MIPS_CPIC;
+
+ if (Pic)
+ EFlags |= ELF::EF_MIPS_PIC | ELF::EF_MIPS_CPIC;
MCA.setELFHeaderEFlags(EFlags);
@@ -904,10 +914,10 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
const MCExpr *Size = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(CurPCSym, MCSymbolRefExpr::VK_None, Context),
ExprRef, Context);
- int64_t AbsSize;
- if (!Size->evaluateAsAbsolute(AbsSize, MCA))
- llvm_unreachable("Function size must be evaluatable as absolute");
- Size = MCConstantExpr::create(AbsSize, Context);
+
+ // The ELFObjectWriter can determine the absolute size as it has access to
+ // the layout information of the assembly file, so a size expression rather
+ // than an absolute value is ok here.
static_cast<MCSymbolELF *>(Sym)->setSize(Size);
}
diff --git a/lib/Target/Mips/MicroMips64r6InstrInfo.td b/lib/Target/Mips/MicroMips64r6InstrInfo.td
index 05aad515da46..6b7f39e9dd79 100644
--- a/lib/Target/Mips/MicroMips64r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips64r6InstrInfo.td
@@ -475,29 +475,11 @@ defm : MaterializeImms<i64, ZERO_64, DADDIU_MM64R6, LUi64, ORi64>;
//
//===----------------------------------------------------------------------===//
-def : MipsPat<(MipsLo tglobaladdr:$in),
- (DADDIU_MM64R6 ZERO_64, tglobaladdr:$in)>, ISA_MICROMIPS64R6;
-def : MipsPat<(MipsLo tblockaddress:$in),
- (DADDIU_MM64R6 ZERO_64, tblockaddress:$in)>, ISA_MICROMIPS64R6;
-def : MipsPat<(MipsLo tjumptable:$in),
- (DADDIU_MM64R6 ZERO_64, tjumptable:$in)>, ISA_MICROMIPS64R6;
-def : MipsPat<(MipsLo tconstpool:$in),
- (DADDIU_MM64R6 ZERO_64, tconstpool:$in)>, ISA_MICROMIPS64R6;
-def : MipsPat<(MipsLo tglobaltlsaddr:$in),
- (DADDIU_MM64R6 ZERO_64, tglobaltlsaddr:$in)>, ISA_MICROMIPS64R6;
-def : MipsPat<(MipsLo texternalsym:$in),
- (DADDIU_MM64R6 ZERO_64, texternalsym:$in)>, ISA_MICROMIPS64R6;
-
-def : MipsPat<(add GPR64:$hi, (MipsLo tglobaladdr:$lo)),
- (DADDIU_MM64R6 GPR64:$hi, tglobaladdr:$lo)>, ISA_MICROMIPS64R6;
-def : MipsPat<(add GPR64:$hi, (MipsLo tblockaddress:$lo)),
- (DADDIU_MM64R6 GPR64:$hi, tblockaddress:$lo)>, ISA_MICROMIPS64R6;
-def : MipsPat<(add GPR64:$hi, (MipsLo tjumptable:$lo)),
- (DADDIU_MM64R6 GPR64:$hi, tjumptable:$lo)>, ISA_MICROMIPS64R6;
-def : MipsPat<(add GPR64:$hi, (MipsLo tconstpool:$lo)),
- (DADDIU_MM64R6 GPR64:$hi, tconstpool:$lo)>, ISA_MICROMIPS64R6;
-def : MipsPat<(add GPR64:$hi, (MipsLo tglobaltlsaddr:$lo)),
- (DADDIU_MM64R6 GPR64:$hi, tglobaltlsaddr:$lo)>, ISA_MICROMIPS64R6;
+defm : MipsHiLoRelocs<LUi64, DADDIU_MM64R6, ZERO_64, GPR64Opnd>, SYM_32,
+ ISA_MICROMIPS64R6;
+
+defm : MipsHighestHigherHiLoRelocs<LUi64, DADDIU_MM64R6>, SYM_64,
+ ISA_MICROMIPS64R6;
def : MipsPat<(addc GPR64:$lhs, GPR64:$rhs),
(DADDU_MM64R6 GPR64:$lhs, GPR64:$rhs)>, ISA_MICROMIPS64R6;
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index c0de9e7390a4..ee554bc7f69a 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -1136,12 +1136,6 @@ let Predicates = [InMicroMips] in {
def : MipsInstAlias<
"sgtu $rs, $rt",
(SLTu_MM GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
- def : MipsInstAlias<"slt $rs, $rt, $imm",
- (SLTi_MM GPR32Opnd:$rs, GPR32Opnd:$rt,
- simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<"sltu $rs, $rt, $imm",
- (SLTiu_MM GPR32Opnd:$rs, GPR32Opnd:$rt,
- simm32_relaxed:$imm), 0>;
def : MipsInstAlias<"sll $rd, $rt, $rs",
(SLLV_MM GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
def : MipsInstAlias<"sra $rd, $rt, $rs",
@@ -1163,18 +1157,21 @@ let Predicates = [InMicroMips] in {
def : MipsInstAlias<"rotr $rt, $imm",
(ROTR_MM GPR32Opnd:$rt, GPR32Opnd:$rt, uimm5:$imm), 0>;
def : MipsInstAlias<"syscall", (SYSCALL_MM 0), 1>;
- def : MipsInstAlias<"and $rs, $rt, $imm",
- (ANDi_MM GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
- def : MipsInstAlias<"and $rs, $imm",
- (ANDi_MM GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), 0>;
- def : MipsInstAlias<"or $rs, $rt, $imm",
- (ORi_MM GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
- def : MipsInstAlias<"or $rs, $imm",
- (ORi_MM GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
- def : MipsInstAlias<"xor $rs, $rt, $imm",
- (XORi_MM GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
- def : MipsInstAlias<"xor $rs, $imm",
- (XORi_MM GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"add", ADDi_MM>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"addu", ADDiu_MM>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"and", ANDi_MM>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"or", ORi_MM>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"xor", XORi_MM>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"slt", SLTi_MM>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"sltu", SLTiu_MM>;
+
def : MipsInstAlias<"not $rt, $rs",
(NOR_MM GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>;
def : MipsInstAlias<"not $rt",
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 670272d47e95..9615bc38bfce 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -156,6 +156,8 @@ def FeatureMips64r6 : SubtargetFeature<"mips64r6", "MipsArchVersion",
"Mips64r6 ISA Support [experimental]",
[FeatureMips32r6, FeatureMips64r5,
FeatureNaN2008]>;
+def FeatureSym32 : SubtargetFeature<"sym32", "HasSym32", "true",
+ "Symbols are 32 bit on Mips64">;
def FeatureMips16 : SubtargetFeature<"mips16", "InMips16Mode", "true",
"Mips16 mode">;
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index 191006d6463c..a71b161b24cc 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -405,7 +405,7 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
"__mips16_ret_dc"
};
const char *Name = Helper[RV];
- AttributeSet A;
+ AttributeList A;
Value *Params[] = {RVal};
Modified = true;
//
@@ -414,13 +414,13 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
// during call setup, the proper call lowering to the helper
// functions will take place.
//
- A = A.addAttribute(C, AttributeSet::FunctionIndex,
+ A = A.addAttribute(C, AttributeList::FunctionIndex,
"__Mips16RetHelper");
- A = A.addAttribute(C, AttributeSet::FunctionIndex,
+ A = A.addAttribute(C, AttributeList::FunctionIndex,
Attribute::ReadNone);
- A = A.addAttribute(C, AttributeSet::FunctionIndex,
+ A = A.addAttribute(C, AttributeList::FunctionIndex,
Attribute::NoInline);
- Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, nullptr));
+ Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T));
CallInst::Create(F, Params, "", &I);
} else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
FunctionType *FT = CI->getFunctionType();
@@ -490,15 +490,15 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
// remove the use-soft-float attribute
//
static void removeUseSoftFloat(Function &F) {
- AttributeSet A;
+ AttributeList A;
DEBUG(errs() << "removing -use-soft-float\n");
- A = A.addAttribute(F.getContext(), AttributeSet::FunctionIndex,
+ A = A.addAttribute(F.getContext(), AttributeList::FunctionIndex,
"use-soft-float", "false");
- F.removeAttributes(AttributeSet::FunctionIndex, A);
+ F.removeAttributes(AttributeList::FunctionIndex, A);
if (F.hasFnAttribute("use-soft-float")) {
DEBUG(errs() << "still has -use-soft-float\n");
}
- F.addAttributes(AttributeSet::FunctionIndex, A);
+ F.addAttributes(AttributeList::FunctionIndex, A);
}
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index 021fb8678686..52bf690a8083 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -766,6 +766,7 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIM16Alu> {
let hasDelaySlot = 1;
let isTerminator=1;
let isBarrier=1;
+ let isReturn=1;
}
def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIM16Alu> {
@@ -773,6 +774,7 @@ def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIM16Alu> {
let isIndirectBranch = 1;
let isTerminator=1;
let isBarrier=1;
+ let isReturn=1;
}
def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIM16Alu> {
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index 1b4d73b79895..3272319ad50f 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -917,6 +917,12 @@ def : MipsInstAlias<"jrc $rs", (JIC GPR32Opnd:$rs, 0), 1>, ISA_MIPS32R6, GPR_32;
let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"jalrc $rs", (JIALC GPR32Opnd:$rs, 0), 1>, ISA_MIPS32R6, GPR_32;
}
+
+def : MipsInstAlias<"div $rs, $rt", (DIV GPR32Opnd:$rs, GPR32Opnd:$rs,
+ GPR32Opnd:$rt)>, ISA_MIPS32R6;
+def : MipsInstAlias<"divu $rs, $rt", (DIVU GPR32Opnd:$rs, GPR32Opnd:$rs,
+ GPR32Opnd:$rt)>, ISA_MIPS32R6;
+
//===----------------------------------------------------------------------===//
//
// Patterns and Pseudo Instructions
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 521e22fb7992..99025fe1341d 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -326,6 +326,14 @@ let AdditionalPredicates = [NotInMicroMips] in {
EXT_FM<5>, ISA_MIPS64R2;
}
+let isCodeGenOnly = 1, AdditionalPredicates = [NotInMicroMips] in {
+ def DEXT64_32 : InstSE<(outs GPR64Opnd:$rt),
+ (ins GPR32Opnd:$rs, uimm5_report_uimm6:$pos,
+ uimm5_plus1:$size),
+ "dext $rt, $rs, $pos, $size", [], II_EXT, FrmR, "dext">,
+ EXT_FM<3>, ISA_MIPS64R2;
+}
+
let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
def DSLL64_32 : FR<0x00, 0x3c, (outs GPR64:$rd), (ins GPR32:$rt),
"dsll\t$rd, $rt, 32", [], II_DSLL>;
@@ -356,11 +364,11 @@ class Count1s<string opstr, RegisterOperand RO>:
let TwoOperandAliasConstraint = "$rd = $rs";
}
-class ExtsCins<string opstr, InstrItinClass itin,
- SDPatternOperator Op = null_frag>:
- InstSE<(outs GPR64Opnd:$rt), (ins GPR64Opnd:$rs, uimm5:$pos, uimm5:$lenm1),
- !strconcat(opstr, " $rt, $rs, $pos, $lenm1"),
- [(set GPR64Opnd:$rt, (Op GPR64Opnd:$rs, imm:$pos, imm:$lenm1))],
+class ExtsCins<string opstr, InstrItinClass itin, RegisterOperand RO,
+ PatFrag PosImm, SDPatternOperator Op = null_frag>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, uimm5:$pos, uimm5:$lenm1),
+ !strconcat(opstr, "\t$rt, $rs, $pos, $lenm1"),
+ [(set RO:$rt, (Op RO:$rs, PosImm:$pos, imm:$lenm1))],
itin, FrmR, opstr> {
let TwoOperandAliasConstraint = "$rt = $rs";
}
@@ -424,13 +432,28 @@ def DMUL : ArithLogicR<"dmul", GPR64Opnd, 1, II_DMUL, mul>,
let Defs = [HI0, LO0, P0, P1, P2];
}
-// Extract a signed bit field /+32
-def EXTS : ExtsCins<"exts", II_EXT>, EXTS_FM<0x3a>, ASE_CNMIPS;
-def EXTS32: ExtsCins<"exts32", II_EXT>, EXTS_FM<0x3b>, ASE_CNMIPS;
-
-// Clear and insert a bit field /+32
-def CINS : ExtsCins<"cins", II_INS>, EXTS_FM<0x32>, ASE_CNMIPS;
-def CINS32: ExtsCins<"cins32", II_INS>, EXTS_FM<0x33>, ASE_CNMIPS;
+let AdditionalPredicates = [NotInMicroMips] in {
+ // Extract a signed bit field /+32
+ def EXTS : ExtsCins<"exts", II_EXT, GPR64Opnd, immZExt5>, EXTS_FM<0x3a>,
+ ASE_MIPS64_CNMIPS;
+ def EXTS32: ExtsCins<"exts32", II_EXT, GPR64Opnd, immZExt5Plus32>,
+ EXTS_FM<0x3b>, ASE_MIPS64_CNMIPS;
+
+ // Clear and insert a bit field /+32
+ def CINS : ExtsCins<"cins", II_INS, GPR64Opnd, immZExt5, MipsCIns>,
+ EXTS_FM<0x32>, ASE_MIPS64_CNMIPS;
+ def CINS32: ExtsCins<"cins32", II_INS, GPR64Opnd, immZExt5Plus32, MipsCIns>,
+ EXTS_FM<0x33>, ASE_MIPS64_CNMIPS;
+ let isCodeGenOnly = 1 in {
+ def CINS_i32 : ExtsCins<"cins", II_INS, GPR32Opnd, immZExt5, MipsCIns>,
+ EXTS_FM<0x32>, ASE_MIPS64_CNMIPS;
+ def CINS64_32 :InstSE<(outs GPR64Opnd:$rt),
+ (ins GPR32Opnd:$rs, uimm5:$pos, uimm5:$lenm1),
+ "cins\t$rt, $rs, $pos, $lenm1", [], II_INS, FrmR,
+ "cins">,
+ EXTS_FM<0x32>, ASE_MIPS64_CNMIPS;
+ }
+}
// Move to multiplier/product register
def MTM0 : MoveToLOHI<"mtm0", GPR64Opnd, [MPL0, P0, P1, P2]>, MTMR_FM<0x08>,
@@ -513,41 +536,87 @@ def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>;
def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>;
// hi/lo relocs
-def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
-def : MipsPat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
-def : MipsPat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
-def : MipsPat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
-def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
-def : MipsPat<(MipsHi texternalsym:$in), (LUi64 texternalsym:$in)>;
+let AdditionalPredicates = [NotInMicroMips] in
+defm : MipsHiLoRelocs<LUi64, DADDiu, ZERO_64, GPR64Opnd>, SYM_32;
+
+def : MipsPat<(MipsGotHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
+def : MipsPat<(MipsGotHi texternalsym:$in), (LUi64 texternalsym:$in)>;
+
+multiclass MipsHighestHigherHiLoRelocs<Instruction Lui, Instruction Daddiu> {
+ def : MipsPat<(MipsJmpLink (i64 texternalsym:$dst)),
+ (JAL texternalsym:$dst)>;
+ def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)),
+ (Lui tglobaladdr:$in)>;
+ def : MipsPat<(MipsHighest (i64 tblockaddress:$in)),
+ (Lui tblockaddress:$in)>;
+ def : MipsPat<(MipsHighest (i64 tjumptable:$in)),
+ (Lui tjumptable:$in)>;
+ def : MipsPat<(MipsHighest (i64 tconstpool:$in)),
+ (Lui tconstpool:$in)>;
+ def : MipsPat<(MipsHighest (i64 tglobaltlsaddr:$in)),
+ (Lui tglobaltlsaddr:$in)>;
+ def : MipsPat<(MipsHighest (i64 texternalsym:$in)),
+ (Lui texternalsym:$in)>;
+
+ def : MipsPat<(MipsHigher (i64 tglobaladdr:$in)),
+ (Daddiu ZERO_64, tglobaladdr:$in)>;
+ def : MipsPat<(MipsHigher (i64 tblockaddress:$in)),
+ (Daddiu ZERO_64, tblockaddress:$in)>;
+ def : MipsPat<(MipsHigher (i64 tjumptable:$in)),
+ (Daddiu ZERO_64, tjumptable:$in)>;
+ def : MipsPat<(MipsHigher (i64 tconstpool:$in)),
+ (Daddiu ZERO_64, tconstpool:$in)>;
+ def : MipsPat<(MipsHigher (i64 tglobaltlsaddr:$in)),
+ (Daddiu ZERO_64, tglobaltlsaddr:$in)>;
+ def : MipsPat<(MipsHigher (i64 texternalsym:$in)),
+ (Daddiu ZERO_64, texternalsym:$in)>;
+
+ def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaladdr:$lo))),
+ (Daddiu GPR64:$hi, tglobaladdr:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tblockaddress:$lo))),
+ (Daddiu GPR64:$hi, tblockaddress:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tjumptable:$lo))),
+ (Daddiu GPR64:$hi, tjumptable:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tconstpool:$lo))),
+ (Daddiu GPR64:$hi, tconstpool:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaltlsaddr:$lo))),
+ (Daddiu GPR64:$hi, tglobaltlsaddr:$lo)>;
+
+ def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))),
+ (Daddiu GPR64:$hi, tglobaladdr:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tblockaddress:$lo))),
+ (Daddiu GPR64:$hi, tblockaddress:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tjumptable:$lo))),
+ (Daddiu GPR64:$hi, tjumptable:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tconstpool:$lo))),
+ (Daddiu GPR64:$hi, tconstpool:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaltlsaddr:$lo))),
+ (Daddiu GPR64:$hi, tglobaltlsaddr:$lo)>;
+
+ def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))),
+ (Daddiu GPR64:$hi, tglobaladdr:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tblockaddress:$lo))),
+ (Daddiu GPR64:$hi, tblockaddress:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tjumptable:$lo))),
+ (Daddiu GPR64:$hi, tjumptable:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tconstpool:$lo))),
+ (Daddiu GPR64:$hi, tconstpool:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaltlsaddr:$lo))),
+ (Daddiu GPR64:$hi, tglobaltlsaddr:$lo)>;
+
+}
+
+// highest/higher/hi/lo relocs
+let AdditionalPredicates = [NotInMicroMips] in
+defm : MipsHighestHigherHiLoRelocs<LUi64, DADDiu>, SYM_64;
+
+def : WrapperPat<tglobaladdr, DADDiu, GPR64>;
+def : WrapperPat<tconstpool, DADDiu, GPR64>;
+def : WrapperPat<texternalsym, DADDiu, GPR64>;
+def : WrapperPat<tblockaddress, DADDiu, GPR64>;
+def : WrapperPat<tjumptable, DADDiu, GPR64>;
+def : WrapperPat<tglobaltlsaddr, DADDiu, GPR64>;
-let AdditionalPredicates = [NotInMicroMips] in {
- def : MipsPat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
- def : MipsPat<(MipsLo tblockaddress:$in),
- (DADDiu ZERO_64, tblockaddress:$in)>;
- def : MipsPat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
- def : MipsPat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
- def : MipsPat<(MipsLo tglobaltlsaddr:$in),
- (DADDiu ZERO_64, tglobaltlsaddr:$in)>;
- def : MipsPat<(MipsLo texternalsym:$in), (DADDiu ZERO_64, texternalsym:$in)>;
-
- def : MipsPat<(add GPR64:$hi, (MipsLo tglobaladdr:$lo)),
- (DADDiu GPR64:$hi, tglobaladdr:$lo)>;
- def : MipsPat<(add GPR64:$hi, (MipsLo tblockaddress:$lo)),
- (DADDiu GPR64:$hi, tblockaddress:$lo)>;
- def : MipsPat<(add GPR64:$hi, (MipsLo tjumptable:$lo)),
- (DADDiu GPR64:$hi, tjumptable:$lo)>;
- def : MipsPat<(add GPR64:$hi, (MipsLo tconstpool:$lo)),
- (DADDiu GPR64:$hi, tconstpool:$lo)>;
- def : MipsPat<(add GPR64:$hi, (MipsLo tglobaltlsaddr:$lo)),
- (DADDiu GPR64:$hi, tglobaltlsaddr:$lo)>;
-
- def : WrapperPat<tglobaladdr, DADDiu, GPR64>;
- def : WrapperPat<tconstpool, DADDiu, GPR64>;
- def : WrapperPat<texternalsym, DADDiu, GPR64>;
- def : WrapperPat<tblockaddress, DADDiu, GPR64>;
- def : WrapperPat<tjumptable, DADDiu, GPR64>;
- def : WrapperPat<tglobaltlsaddr, DADDiu, GPR64>;
-}
defm : BrcondPats<GPR64, BEQ64, BEQ, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
ZERO_64>;
@@ -600,6 +669,14 @@ def : MipsPat<(i64 (anyext GPR32:$src)),
def : MipsPat<(i64 (zext GPR32:$src)), (DSRL (DSLL64_32 GPR32:$src), 32)>;
def : MipsPat<(i64 (sext GPR32:$src)), (SLL64_32 GPR32:$src)>;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def : MipsPat<(i64 (zext GPR32:$src)), (DEXT64_32 GPR32:$src, 0, 32)>,
+ ISA_MIPS64R2;
+ def : MipsPat<(i64 (zext (i32 (shl GPR32:$rt, immZExt5:$imm)))),
+ (CINS64_32 GPR32:$rt, imm:$imm, (immZExt5To31 imm:$imm))>,
+ ASE_MIPS64_CNMIPS;
+}
+
// Sign extend in register
def : MipsPat<(i64 (sext_inreg GPR64:$src, i32)),
(SLL64_64 GPR64:$src)>;
@@ -661,6 +738,15 @@ let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"daddu $rs, $imm",
(DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
0>, ISA_MIPS3;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"and", ANDi64, GPR64Opnd, imm64>,
+ GPR_64;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"or", ORi64, GPR64Opnd, imm64>,
+ GPR_64;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"xor", XORi64, GPR64Opnd, imm64>,
+ GPR_64;
}
def : MipsInstAlias<"dsll $rd, $rt, $rs",
(DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
@@ -741,21 +827,21 @@ def : MipsInstAlias<"bbit1 $rs, $p, $offset",
def : MipsInstAlias<"exts $rt, $rs, $pos, $lenm1",
(EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rs,
uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
- ASE_CNMIPS;
+ ASE_MIPS64_CNMIPS;
def : MipsInstAlias<"exts $rt, $pos, $lenm1",
(EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rt,
uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
- ASE_CNMIPS;
+ ASE_MIPS64_CNMIPS;
// cins with $pos 32-63 in converted to cins32 with $pos 0-31
def : MipsInstAlias<"cins $rt, $rs, $pos, $lenm1",
(CINS32 GPR64Opnd:$rt, GPR64Opnd:$rs,
uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
- ASE_CNMIPS;
+ ASE_MIPS64_CNMIPS;
def : MipsInstAlias<"cins $rt, $pos, $lenm1",
(CINS32 GPR64Opnd:$rt, GPR64Opnd:$rt,
uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
- ASE_CNMIPS;
+ ASE_MIPS64_CNMIPS;
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
@@ -770,3 +856,81 @@ def LoadAddrReg64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins mem:$addr),
"dla\t$rt, $addr">;
def LoadAddrImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins imm64:$imm64),
"dla\t$rt, $imm64">;
+
+def DMULImmMacro : MipsAsmPseudoInst<(outs), (ins GPR64Opnd:$rs, GPR64Opnd:$rt,
+ simm32_relaxed:$imm),
+ "dmul\t$rs, $rt, $imm">,
+ ISA_MIPS3_NOT_32R6_64R6;
+def DMULOMacro : MipsAsmPseudoInst<(outs), (ins GPR64Opnd:$rs, GPR64Opnd:$rt,
+ GPR64Opnd:$rd),
+ "dmulo\t$rs, $rt, $rd">,
+ ISA_MIPS3_NOT_32R6_64R6;
+def DMULOUMacro : MipsAsmPseudoInst<(outs), (ins GPR64Opnd:$rs, GPR64Opnd:$rt,
+ GPR64Opnd:$rd),
+ "dmulou\t$rs, $rt, $rd">,
+ ISA_MIPS3_NOT_32R6_64R6;
+
+def DMULMacro : MipsAsmPseudoInst<(outs), (ins GPR64Opnd:$rs, GPR64Opnd:$rt,
+ GPR64Opnd:$rd),
+ "dmul\t$rs, $rt, $rd"> {
+ let InsnPredicates = [HasMips3, NotMips64r6, NotCnMips];
+}
+
+let AdditionalPredicates = [NotInMicroMips] in {
+ def DSDivMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, GPR64Opnd:$rt),
+ "ddiv\t$rd, $rs, $rt">,
+ ISA_MIPS3_NOT_32R6_64R6;
+ def DSDivIMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, imm64:$imm),
+ "ddiv\t$rd, $rs, $imm">,
+ ISA_MIPS3_NOT_32R6_64R6;
+ def DUDivMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, GPR64Opnd:$rt),
+ "ddivu\t$rd, $rs, $rt">,
+ ISA_MIPS3_NOT_32R6_64R6;
+ def DUDivIMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, imm64:$imm),
+ "ddivu\t$rd, $rs, $imm">,
+ ISA_MIPS3_NOT_32R6_64R6;
+
+ // GAS expands 'div' and 'ddiv' differently when the destination
+ // register is $zero and the instruction is in the two operand
+ // form. 'ddiv' gets expanded, while 'div' is not expanded.
+
+ def : MipsInstAlias<"ddiv $rs, $rt", (DSDivMacro GPR64Opnd:$rs,
+ GPR64Opnd:$rs,
+ GPR64Opnd:$rt), 0>,
+ ISA_MIPS3_NOT_32R6_64R6;
+ def : MipsInstAlias<"ddiv $rd, $imm", (DSDivIMacro GPR64Opnd:$rd,
+ GPR64Opnd:$rd,
+ imm64:$imm), 0>,
+ ISA_MIPS3_NOT_32R6_64R6;
+
+ // GAS expands 'divu' and 'ddivu' differently when the destination
+ // register is $zero and the instruction is in the two operand
+ // form. 'ddivu' gets expanded, while 'divu' is not expanded.
+
+ def : MipsInstAlias<"ddivu $rt, $rs", (DUDivMacro GPR64Opnd:$rt,
+ GPR64Opnd:$rt,
+ GPR64Opnd:$rs), 0>,
+ ISA_MIPS3_NOT_32R6_64R6;
+ def : MipsInstAlias<"ddivu $rd, $imm", (DUDivIMacro GPR64Opnd:$rd,
+ GPR64Opnd:$rd,
+ imm64:$imm), 0>,
+ ISA_MIPS3_NOT_32R6_64R6;
+}
+
+def NORImm64 : NORIMM_DESC_BASE<GPR64Opnd, imm64>, GPR_64;
+def : MipsInstAlias<"nor\t$rs, $imm", (NORImm64 GPR64Opnd:$rs, GPR64Opnd:$rs,
+ imm64:$imm)>, GPR_64;
+def SLTImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rs),
+ (ins GPR64Opnd:$rt, imm64:$imm),
+ "slt\t$rs, $rt, $imm">, GPR_64;
+def : MipsInstAlias<"slt\t$rs, $imm", (SLTImm64 GPR64Opnd:$rs, GPR64Opnd:$rs,
+ imm64:$imm)>, GPR_64;
+def SLTUImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rs),
+ (ins GPR64Opnd:$rt, imm64:$imm),
+ "sltu\t$rs, $rt, $imm">, GPR_64;
+def : MipsInstAlias<"sltu\t$rs, $imm", (SLTUImm64 GPR64Opnd:$rs, GPR64Opnd:$rs,
+ imm64:$imm)>, GPR_64;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 04d6529a073d..2a9d96205eb9 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -39,6 +39,7 @@
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbolELF.h"
@@ -79,6 +80,9 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
NaClAlignIndirectJumpTargets(MF);
AsmPrinter::runOnMachineFunction(MF);
+
+ EmitXRayTable();
+
return true;
}
@@ -132,6 +136,7 @@ void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MipsTargetStreamer &TS = getTargetStreamer();
+ unsigned Opc = MI->getOpcode();
TS.forbidModuleDirective();
if (MI->isDebugValue()) {
@@ -143,20 +148,20 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
// If we just ended a constant pool, mark it as such.
- if (InConstantPool && MI->getOpcode() != Mips::CONSTPOOL_ENTRY) {
+ if (InConstantPool && Opc != Mips::CONSTPOOL_ENTRY) {
OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
InConstantPool = false;
}
- if (MI->getOpcode() == Mips::CONSTPOOL_ENTRY) {
+ if (Opc == Mips::CONSTPOOL_ENTRY) {
// CONSTPOOL_ENTRY - This instruction represents a floating
- //constant pool in the function. The first operand is the ID#
+ // constant pool in the function. The first operand is the ID#
// for this instruction, the second is the index into the
// MachineConstantPool that this is, the third is the size in
// bytes of this constant pool entry.
// The required alignment is specified on the basic block holding this MI.
//
unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
- unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
+ unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
// If this is the first entry of the pool, mark it.
if (!InConstantPool) {
@@ -174,6 +179,17 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ switch (Opc) {
+ case Mips::PATCHABLE_FUNCTION_ENTER:
+ LowerPATCHABLE_FUNCTION_ENTER(*MI);
+ return;
+ case Mips::PATCHABLE_FUNCTION_EXIT:
+ LowerPATCHABLE_FUNCTION_EXIT(*MI);
+ return;
+ case Mips::PATCHABLE_TAIL_CALL:
+ LowerPATCHABLE_TAIL_CALL(*MI);
+ return;
+ }
MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
@@ -574,6 +590,8 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
case MipsII::MO_GOT: O << "%got("; break;
case MipsII::MO_ABS_HI: O << "%hi("; break;
case MipsII::MO_ABS_LO: O << "%lo("; break;
+ case MipsII::MO_HIGHER: O << "%higher("; break;
+ case MipsII::MO_HIGHEST: O << "%highest(("; break;
case MipsII::MO_TLSGD: O << "%tlsgd("; break;
case MipsII::MO_GOTTPREL: O << "%gottprel("; break;
case MipsII::MO_TPREL_HI: O << "%tprel_hi("; break;
@@ -698,7 +716,7 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
// Ideally it should test for properties of the ABI and not the ABI
// itself.
// For the moment, I'm only correcting enough to make MIPS-IV work.
- if (!isPositionIndependent() && !ABI.IsN64())
+ if (!isPositionIndependent() && STI.hasSym32())
TS.emitDirectiveOptionPic0();
}
@@ -1032,6 +1050,149 @@ void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer->SwitchSection(OutContext.getObjectFileInfo()->getTextSection());
}
+void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
+ const uint8_t NoopsInSledCount = Subtarget->isGP64bit() ? 15 : 11;
+ // For mips32 we want to emit the following pattern:
+ //
+ // .Lxray_sled_N:
+ // ALIGN
+ // B .tmpN
+ // 11 NOP instructions (44 bytes)
+ // ADDIU T9, T9, 52
+ // .tmpN
+ //
+ // We need the 44 bytes (11 instructions) because at runtime, we'd
+ // be patching over the full 48 bytes (12 instructions) with the following
+ // pattern:
+ //
+ // ADDIU SP, SP, -8
+ // NOP
+ // SW RA, 4(SP)
+ // SW T9, 0(SP)
+ // LUI T9, %hi(__xray_FunctionEntry/Exit)
+ // ORI T9, T9, %lo(__xray_FunctionEntry/Exit)
+ // LUI T0, %hi(function_id)
+ // JALR T9
+ // ORI T0, T0, %lo(function_id)
+ // LW T9, 0(SP)
+ // LW RA, 4(SP)
+ // ADDIU SP, SP, 8
+ //
+ // We add 52 bytes to t9 because we want to adjust the function pointer to
+ // the actual start of function i.e. the address just after the noop sled.
+ // We do this because gp displacement relocation is emitted at the start of
+ // of the function i.e after the nop sled and to correctly calculate the
+ // global offset table address, t9 must hold the address of the instruction
+ // containing the gp displacement relocation.
+ // FIXME: Is this correct for the static relocation model?
+ //
+ // For mips64 we want to emit the following pattern:
+ //
+ // .Lxray_sled_N:
+ // ALIGN
+ // B .tmpN
+ // 15 NOP instructions (60 bytes)
+ // .tmpN
+ //
+ // We need the 60 bytes (15 instructions) because at runtime, we'd
+ // be patching over the full 64 bytes (16 instructions) with the following
+ // pattern:
+ //
+ // DADDIU SP, SP, -16
+ // NOP
+ // SD RA, 8(SP)
+ // SD T9, 0(SP)
+ // LUI T9, %highest(__xray_FunctionEntry/Exit)
+ // ORI T9, T9, %higher(__xray_FunctionEntry/Exit)
+ // DSLL T9, T9, 16
+ // ORI T9, T9, %hi(__xray_FunctionEntry/Exit)
+ // DSLL T9, T9, 16
+ // ORI T9, T9, %lo(__xray_FunctionEntry/Exit)
+ // LUI T0, %hi(function_id)
+ // JALR T9
+ // ADDIU T0, T0, %lo(function_id)
+ // LD T9, 0(SP)
+ // LD RA, 8(SP)
+ // DADDIU SP, SP, 16
+ //
+ OutStreamer->EmitCodeAlignment(4);
+ auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+ OutStreamer->EmitLabel(CurSled);
+ auto Target = OutContext.createTempSymbol();
+
+ // Emit "B .tmpN" instruction, which jumps over the nop sled to the actual
+ // start of function
+ const MCExpr *TargetExpr = MCSymbolRefExpr::create(
+ Target, MCSymbolRefExpr::VariantKind::VK_None, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(Mips::BEQ)
+ .addReg(Mips::ZERO)
+ .addReg(Mips::ZERO)
+ .addExpr(TargetExpr));
+
+ for (int8_t I = 0; I < NoopsInSledCount; I++)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(Mips::SLL)
+ .addReg(Mips::ZERO)
+ .addReg(Mips::ZERO)
+ .addImm(0));
+
+ OutStreamer->EmitLabel(Target);
+
+ if (!Subtarget->isGP64bit()) {
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(Mips::ADDiu)
+ .addReg(Mips::T9)
+ .addReg(Mips::T9)
+ .addImm(0x34));
+ }
+
+ recordSled(CurSled, MI, Kind);
+}
+
+void MipsAsmPrinter::EmitXRayTable() {
+ if (Sleds.empty())
+ return;
+ if (Subtarget->isTargetELF()) {
+ auto PrevSection = OutStreamer->getCurrentSectionOnly();
+ auto Fn = MF->getFunction();
+ MCSection *Section;
+
+ if (Fn->hasComdat())
+ Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
+ Fn->getComdat()->getName());
+ else
+ Section =
+ OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC, 0, CurrentFnSym->getName());
+
+ OutStreamer->SwitchSection(Section);
+ for (const auto &Sled : Sleds) {
+ OutStreamer->EmitSymbolValue(Sled.Sled, Subtarget->isGP64bit() ? 8 : 4);
+ OutStreamer->EmitSymbolValue(CurrentFnSym, Subtarget->isGP64bit() ? 8 : 4);
+ auto Kind = static_cast<uint8_t>(Sled.Kind);
+ OutStreamer->EmitBytes(
+ StringRef(reinterpret_cast<const char *>(&Kind), 1));
+ OutStreamer->EmitBytes(
+ StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
+ OutStreamer->EmitZeros(Subtarget->isGP64bit() ? 14 : 6);
+ }
+ OutStreamer->SwitchSection(PrevSection);
+ }
+ Sleds.clear();
+}
+
+void MipsAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) {
+ EmitSled(MI, SledKind::FUNCTION_ENTER);
+}
+
+void MipsAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) {
+ EmitSled(MI, SledKind::FUNCTION_EXIT);
+}
+
+void MipsAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) {
+ EmitSled(MI, SledKind::TAIL_CALL);
+}
+
void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
raw_ostream &OS) {
// TODO: implement
@@ -1039,7 +1200,7 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
// Emit .dtprelword or .dtpreldword directive
// and value for debug thread local expression.
-void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value,
+void MipsAsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
unsigned Size) const {
switch (Size) {
case 4:
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index c5cf5241c236..4699e1b0bd3b 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -35,7 +35,21 @@ class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
void EmitInstrWithMacroNoAT(const MachineInstr *MI);
+ //===------------------------------------------------------------------===//
+ // XRay implementation
+ //===------------------------------------------------------------------===//
+public:
+ // XRay-specific lowering for Mips.
+ void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
+ void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
+ void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
+ // Helper function that emits the XRay sleds we've collected for a particular
+ // function.
+ void EmitXRayTable();
+
private:
+ void EmitSled(const MachineInstr &MI, SledKind Kind);
+
// tblgen'erated function.
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
const MachineInstr *MI);
@@ -140,7 +154,7 @@ public:
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
- void EmitDebugValue(const MCExpr *Value, unsigned Size) const override;
+ void EmitDebugThreadLocal(const MCExpr *Value, unsigned Size) const override;
};
}
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index 08b8ed31ccbb..026f66a1c0e1 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
//
-//
// This pass is used to make Pc relative loads of constants.
// For now, only Mips16 will use this.
//
@@ -19,30 +18,43 @@
// This can be particularly helpful in static relocation mode for embedded
// non-linux targets.
//
-//
+//===----------------------------------------------------------------------===//
#include "Mips.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips16InstrInfo.h"
#include "MipsMachineFunction.h"
-#include "MipsTargetMachine.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <new>
+#include <vector>
using namespace llvm;
@@ -58,7 +70,6 @@ static cl::opt<bool>
AlignConstantIslands("mips-align-constant-islands", cl::Hidden, cl::init(true),
cl::desc("Align constant islands in code"));
-
// Rather than do make check tests with huge amounts of code, we force
// the test to use this amount.
//
@@ -178,7 +189,6 @@ static unsigned int branchMaxOffsets(unsigned int Opcode) {
namespace {
-
typedef MachineBasicBlock::iterator Iter;
typedef MachineBasicBlock::reverse_iterator ReverseIter;
@@ -195,7 +205,6 @@ namespace {
/// tracks a list of users.
class MipsConstantIslands : public MachineFunctionPass {
-
/// BasicBlockInfo - Information about the offset and size of a single
/// basic block.
struct BasicBlockInfo {
@@ -208,14 +217,16 @@ namespace {
///
/// Because worst case padding is used, the computed offset of an aligned
/// block may not actually be aligned.
- unsigned Offset;
+ unsigned Offset = 0;
/// Size - Size of the basic block in bytes. If the block contains
/// inline assembly, this is a worst case estimate.
///
/// The size does not include any alignment padding whether from the
/// beginning of the block, or from an aligned jump table at the end.
- unsigned Size;
+ unsigned Size = 0;
+
+ BasicBlockInfo() = default;
// FIXME: ignore LogAlign for this patch
//
@@ -223,9 +234,6 @@ namespace {
unsigned PO = Offset + Size;
return PO;
}
-
- BasicBlockInfo() : Offset(0), Size(0) {}
-
};
std::vector<BasicBlockInfo> BBInfo;
@@ -257,13 +265,16 @@ namespace {
MachineInstr *MI;
MachineInstr *CPEMI;
MachineBasicBlock *HighWaterMark;
+
private:
unsigned MaxDisp;
unsigned LongFormMaxDisp; // mips16 has 16/32 bit instructions
// with different displacements
unsigned LongFormOpcode;
+
public:
bool NegOk;
+
CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
bool neg,
unsigned longformmaxdisp, unsigned longformopcode)
@@ -272,18 +283,22 @@ namespace {
NegOk(neg){
HighWaterMark = CPEMI->getParent();
}
+
/// getMaxDisp - Returns the maximum displacement supported by MI.
unsigned getMaxDisp() const {
unsigned xMaxDisp = ConstantIslandsSmallOffset?
ConstantIslandsSmallOffset: MaxDisp;
return xMaxDisp;
}
+
void setMaxDisp(unsigned val) {
MaxDisp = val;
}
+
unsigned getLongFormMaxDisp() const {
return LongFormMaxDisp;
}
+
unsigned getLongFormOpcode() const {
return LongFormOpcode;
}
@@ -300,6 +315,7 @@ namespace {
MachineInstr *CPEMI;
unsigned CPI;
unsigned RefCount;
+
CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
: CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
};
@@ -309,7 +325,7 @@ namespace {
/// existed upon entry to this pass), it keeps a vector of entries.
/// Original elements are cloned as we go along; the clones are
/// put in the vector of the original element, but have distinct CPIs.
- std::vector<std::vector<CPEntry> > CPEntries;
+ std::vector<std::vector<CPEntry>> CPEntries;
/// ImmBranch - One per immediate branch, keeping the machine instruction
/// pointer, conditional or unconditional, the max displacement,
@@ -320,6 +336,7 @@ namespace {
unsigned MaxDisp : 31;
bool isCond : 1;
int UncondBr;
+
ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
: MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
};
@@ -332,29 +349,27 @@ namespace {
/// the branch fix up pass.
bool HasFarJump;
- const MipsSubtarget *STI;
+ const MipsSubtarget *STI = nullptr;
const Mips16InstrInfo *TII;
MipsFunctionInfo *MFI;
- MachineFunction *MF;
- MachineConstantPool *MCP;
+ MachineFunction *MF = nullptr;
+ MachineConstantPool *MCP = nullptr;
unsigned PICLabelUId;
- bool PrescannedForConstants;
+ bool PrescannedForConstants = false;
void initPICLabelUId(unsigned UId) {
PICLabelUId = UId;
}
-
unsigned createPICLabelUId() {
return PICLabelUId++;
}
public:
static char ID;
- MipsConstantIslands()
- : MachineFunctionPass(ID), STI(nullptr), MF(nullptr), MCP(nullptr),
- PrescannedForConstants(false) {}
+
+ MipsConstantIslands() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "Mips Constant Islands"; }
@@ -403,13 +418,11 @@ namespace {
bool fixupUnconditionalBr(ImmBranch &Br);
void prescanForConstants();
-
- private:
-
};
char MipsConstantIslands::ID = 0;
-} // end of anonymous namespace
+
+} // end anonymous namespace
bool MipsConstantIslands::isOffsetInRange
(unsigned UserOffset, unsigned TrialOffset,
@@ -417,20 +430,17 @@ bool MipsConstantIslands::isOffsetInRange
return isOffsetInRange(UserOffset, TrialOffset,
U.getMaxDisp(), U.NegOk);
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// print block size and offset information - debugging
-void MipsConstantIslands::dumpBBs() {
- DEBUG({
- for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
- const BasicBlockInfo &BBI = BBInfo[J];
- dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
- << format(" size=%#x\n", BBInfo[J].Size);
- }
- });
-}
-/// Returns a pass that converts branches to long branches.
-FunctionPass *llvm::createMipsConstantIslandPass() {
- return new MipsConstantIslands();
+LLVM_DUMP_METHOD void MipsConstantIslands::dumpBBs() {
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
}
+#endif
bool MipsConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// The intention is for this to be a mips16 only pass for now
@@ -527,7 +537,6 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
MF->push_back(BB);
-
// MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
@@ -647,7 +656,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
computeBlockSize(&*I);
-
// Compute block offsets.
adjustBBOffsetsAfter(&MF->front());
@@ -737,7 +745,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
if (Opc == Mips::CONSTPOOL_ENTRY)
continue;
-
// Scan the instructions for constant pool operands.
for (unsigned op = 0, e = MI.getNumOperands(); op != e; ++op)
if (MI.getOperand(op).isCPI()) {
@@ -784,12 +791,9 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// Instructions can only use one CP entry, don't bother scanning the
// rest of the operands.
break;
-
}
-
}
}
-
}
/// computeBlockSize - Compute the size and some alignment information for MBB.
@@ -921,8 +925,6 @@ MipsConstantIslands::splitBlockBeforeInstr(MachineInstr &MI) {
return NewBB;
}
-
-
/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
/// reference) is within MaxDisp of TrialOffset (a proposed location of a
/// constant pool entry).
@@ -1337,7 +1339,6 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
if (result==1) return false;
else if (result==2) return true;
-
// Look for water where we can place this CPE.
MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
MachineBasicBlock *NewMBB;
@@ -1371,7 +1372,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// it. Check for this so it will be removed from the WaterList.
// Also remove any entry from NewWaterList.
MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
- IP = find(WaterList, WaterBB);
+ IP = llvm::find(WaterList, WaterBB);
if (IP != WaterList.end())
NewWaterList.erase(WaterBB);
@@ -1473,9 +1474,7 @@ bool MipsConstantIslands::removeUnusedCPEntries() {
/// specific BB can fit in MI's displacement field.
bool MipsConstantIslands::isBBInRange
(MachineInstr *MI,MachineBasicBlock *DestBB, unsigned MaxDisp) {
-
-unsigned PCAdj = 4;
-
+ unsigned PCAdj = 4;
unsigned BrOffset = getOffsetOf(MI) + PCAdj;
unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
@@ -1553,7 +1552,6 @@ MipsConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
return true;
}
-
/// fixupConditionalBr - Fix up a conditional branch whose destination is too
/// far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
@@ -1614,7 +1612,6 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
}
}
-
if (NeedSplit) {
splitBlockBeforeInstr(*MI);
// No need for the branch to the next block. We're adding an unconditional
@@ -1654,7 +1651,6 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
return true;
}
-
void MipsConstantIslands::prescanForConstants() {
unsigned J = 0;
(void)J;
@@ -1667,11 +1663,11 @@ void MipsConstantIslands::prescanForConstants() {
PrescannedForConstants = true;
DEBUG(dbgs() << "constant island constant " << *I << "\n");
J = I->getNumOperands();
- DEBUG(dbgs() << "num operands " << J << "\n");
+ DEBUG(dbgs() << "num operands " << J << "\n");
MachineOperand& Literal = I->getOperand(1);
if (Literal.isImm()) {
int64_t V = Literal.getImm();
- DEBUG(dbgs() << "literal " << V << "\n");
+ DEBUG(dbgs() << "literal " << V << "\n");
Type *Int32Ty =
Type::getInt32Ty(MF->getFunction()->getContext());
const Constant *C = ConstantInt::get(Int32Ty, V);
@@ -1692,3 +1688,8 @@ void MipsConstantIslands::prescanForConstants() {
}
}
}
+
+/// Returns a pass that converts branches to long branches.
+FunctionPass *llvm::createMipsConstantIslandPass() {
+ return new MipsConstantIslands();
+}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index c821084f68cf..ae58c26e145a 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -14,21 +14,39 @@
#include "MCTargetDesc/MipsMCNaCl.h"
#include "Mips.h"
#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <utility>
using namespace llvm;
@@ -84,6 +102,7 @@ static cl::opt<CompactBranchPolicy> MipsCompactBranchPolicy(
);
namespace {
+
typedef MachineBasicBlock::iterator Iter;
typedef MachineBasicBlock::reverse_iterator ReverseIter;
typedef SmallDenseMap<MachineBasicBlock*, MachineInstr*, 2> BB2BrMap;
@@ -91,6 +110,7 @@ namespace {
class RegDefsUses {
public:
RegDefsUses(const TargetRegisterInfo &TRI);
+
void init(const MachineInstr &MI);
/// This function sets all caller-saved registers in Defs.
@@ -120,18 +140,18 @@ namespace {
/// Base class for inspecting loads and stores.
class InspectMemInstr {
public:
- InspectMemInstr(bool ForbidMemInstr_)
- : OrigSeenLoad(false), OrigSeenStore(false), SeenLoad(false),
- SeenStore(false), ForbidMemInstr(ForbidMemInstr_) {}
+ InspectMemInstr(bool ForbidMemInstr_) : ForbidMemInstr(ForbidMemInstr_) {}
+ virtual ~InspectMemInstr() = default;
/// Return true if MI cannot be moved to delay slot.
bool hasHazard(const MachineInstr &MI);
- virtual ~InspectMemInstr() {}
-
protected:
/// Flags indicating whether loads or stores have been seen.
- bool OrigSeenLoad, OrigSeenStore, SeenLoad, SeenStore;
+ bool OrigSeenLoad = false;
+ bool OrigSeenStore = false;
+ bool SeenLoad = false;
+ bool SeenStore = false;
/// Memory instructions are not allowed to move to delay slot if this flag
/// is true.
@@ -145,6 +165,7 @@ namespace {
class NoMemInstr : public InspectMemInstr {
public:
NoMemInstr() : InspectMemInstr(true) {}
+
private:
bool hasHazard_(const MachineInstr &MI) override { return true; }
};
@@ -153,6 +174,7 @@ namespace {
class LoadFromStackOrConst : public InspectMemInstr {
public:
LoadFromStackOrConst() : InspectMemInstr(false) {}
+
private:
bool hasHazard_(const MachineInstr &MI) override;
};
@@ -183,7 +205,8 @@ namespace {
/// Flags indicating whether loads or stores with no underlying objects have
/// been seen.
- bool SeenNoObjLoad, SeenNoObjStore;
+ bool SeenNoObjLoad = false;
+ bool SeenNoObjStore = false;
};
class Filler : public MachineFunctionPass {
@@ -271,8 +294,10 @@ namespace {
static char ID;
};
+
char Filler::ID = 0;
-} // end of anonymous namespace
+
+} // end anonymous namespace
static bool hasUnoccupiedSlot(const MachineInstr *MI) {
return MI->hasDelaySlot() && !MI->isBundledWithSucc();
@@ -458,8 +483,7 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
}
MemDefsUses::MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI_)
- : InspectMemInstr(false), MFI(MFI_), DL(DL), SeenNoObjLoad(false),
- SeenNoObjStore(false) {}
+ : InspectMemInstr(false), MFI(MFI_), DL(DL) {}
bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
bool HasHazard = false;
@@ -646,12 +670,6 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
return Changed;
}
-/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
-/// slots in Mips MachineFunctions
-FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
- return new Filler(tm);
-}
-
template<typename IterTy>
bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
RegDefsUses &RegDU, InspectMemInstr& IM, Iter Slot,
@@ -889,3 +907,9 @@ bool Filler::terminateSearch(const MachineInstr &Candidate) const {
Candidate.isPosition() || Candidate.isInlineAsm() ||
Candidate.hasUnmodeledSideEffects());
}
+
+/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Mips MachineFunctions
+FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
+ return new Filler(tm);
+}
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index a44192f57aa0..c060cf06099d 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -1,4 +1,4 @@
-//===-- MipsFastISel.cpp - Mips FastISel implementation --------------------===//
+//===-- MipsFastISel.cpp - Mips FastISel implementation -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,24 +14,62 @@
///
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsABIInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsCCState.h"
#include "MipsInstrInfo.h"
#include "MipsISelLowering.h"
#include "MipsMachineFunction.h"
-#include "MipsRegisterInfo.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <new>
#define DEBUG_TYPE "mips-fastisel"
@@ -47,35 +85,40 @@ class MipsFastISel final : public FastISel {
typedef enum { RegBase, FrameIndexBase } BaseKind;
private:
- BaseKind Kind;
+ BaseKind Kind = RegBase;
union {
unsigned Reg;
int FI;
} Base;
- int64_t Offset;
+ int64_t Offset = 0;
- const GlobalValue *GV;
+ const GlobalValue *GV = nullptr;
public:
// Innocuous defaults for our address.
- Address() : Kind(RegBase), Offset(0), GV(0) { Base.Reg = 0; }
+ Address() { Base.Reg = 0; }
+
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
+
void setReg(unsigned Reg) {
assert(isRegBase() && "Invalid base register access!");
Base.Reg = Reg;
}
+
unsigned getReg() const {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
+
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
}
+
unsigned getFI() const {
assert(isFIBase() && "Invalid base frame index access!");
return Base.FI;
@@ -165,14 +208,17 @@ private:
MachineInstrBuilder emitInst(unsigned Opc) {
return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
}
+
MachineInstrBuilder emitInst(unsigned Opc, unsigned DstReg) {
return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
DstReg);
}
+
MachineInstrBuilder emitInstStore(unsigned Opc, unsigned SrcReg,
unsigned MemReg, int64_t MemOffset) {
return emitInst(Opc).addReg(SrcReg).addReg(MemReg).addImm(MemOffset);
}
+
MachineInstrBuilder emitInstLoad(unsigned Opc, unsigned DstReg,
unsigned MemReg, int64_t MemOffset) {
return emitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset);
@@ -198,6 +244,7 @@ private:
bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
unsigned &NumBytes);
bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
+
const MipsABIInfo &getABI() const {
return static_cast<const MipsTargetMachine &>(TM).getABI();
}
@@ -220,7 +267,8 @@ public:
#include "MipsGenFastISel.inc"
};
-} // end anonymous namespace.
+
+} // end anonymous namespace
static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
@@ -414,7 +462,6 @@ unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) {
}
bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
-
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
@@ -432,10 +479,9 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
switch (Opcode) {
default:
break;
- case Instruction::BitCast: {
+ case Instruction::BitCast:
// Look through bitcasts.
return computeAddress(U->getOperand(0), Addr);
- }
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
int64_t TmpOffset = Addr.getOffset();
@@ -451,7 +497,7 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
- for (;;) {
+ while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
@@ -613,14 +659,12 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
emitInst(Mips::SLTu, ResultReg).addReg(Mips::ZERO).addReg(TempReg);
break;
}
- case CmpInst::ICMP_UGT: {
+ case CmpInst::ICMP_UGT:
emitInst(Mips::SLTu, ResultReg).addReg(RightReg).addReg(LeftReg);
break;
- }
- case CmpInst::ICMP_ULT: {
+ case CmpInst::ICMP_ULT:
emitInst(Mips::SLTu, ResultReg).addReg(LeftReg).addReg(RightReg);
break;
- }
case CmpInst::ICMP_UGE: {
unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::SLTu, TempReg).addReg(LeftReg).addReg(RightReg);
@@ -633,14 +677,12 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
break;
}
- case CmpInst::ICMP_SGT: {
+ case CmpInst::ICMP_SGT:
emitInst(Mips::SLT, ResultReg).addReg(RightReg).addReg(LeftReg);
break;
- }
- case CmpInst::ICMP_SLT: {
+ case CmpInst::ICMP_SLT:
emitInst(Mips::SLT, ResultReg).addReg(LeftReg).addReg(RightReg);
break;
- }
case CmpInst::ICMP_SGE: {
unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::SLT, TempReg).addReg(LeftReg).addReg(RightReg);
@@ -709,6 +751,7 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
}
return true;
}
+
bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment) {
//
@@ -716,35 +759,30 @@ bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
//
unsigned Opc;
switch (VT.SimpleTy) {
- case MVT::i32: {
+ case MVT::i32:
ResultReg = createResultReg(&Mips::GPR32RegClass);
Opc = Mips::LW;
break;
- }
- case MVT::i16: {
+ case MVT::i16:
ResultReg = createResultReg(&Mips::GPR32RegClass);
Opc = Mips::LHu;
break;
- }
- case MVT::i8: {
+ case MVT::i8:
ResultReg = createResultReg(&Mips::GPR32RegClass);
Opc = Mips::LBu;
break;
- }
- case MVT::f32: {
+ case MVT::f32:
if (UnsupportedFPMode)
return false;
ResultReg = createResultReg(&Mips::FGR32RegClass);
Opc = Mips::LWC1;
break;
- }
- case MVT::f64: {
+ case MVT::f64:
if (UnsupportedFPMode)
return false;
ResultReg = createResultReg(&Mips::AFGR64RegClass);
Opc = Mips::LDC1;
break;
- }
default:
return false;
}
@@ -1730,6 +1768,7 @@ bool MipsFastISel::selectTrunc(const Instruction *I) {
updateValueMap(I, SrcReg);
return true;
}
+
bool MipsFastISel::selectIntExt(const Instruction *I) {
Type *DestTy = I->getType();
Value *Src = I->getOperand(0);
@@ -1757,6 +1796,7 @@ bool MipsFastISel::selectIntExt(const Instruction *I) {
updateValueMap(I, ResultReg);
return true;
}
+
bool MipsFastISel::emitIntSExt32r1(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned DestReg) {
unsigned ShiftAmt;
@@ -2074,8 +2114,10 @@ unsigned MipsFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
}
namespace llvm {
+
FastISel *Mips::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
return new MipsFastISel(funcInfo, libInfo);
}
-}
+
+} // end namespace llvm
diff --git a/lib/Target/Mips/MipsHazardSchedule.cpp b/lib/Target/Mips/MipsHazardSchedule.cpp
index 31b86124bc8d..f6fcf6ec9385 100644
--- a/lib/Target/Mips/MipsHazardSchedule.cpp
+++ b/lib/Target/Mips/MipsHazardSchedule.cpp
@@ -36,7 +36,7 @@
///
/// A) A previous pass has created a compact branch directly.
/// B) Transforming a delay slot branch into compact branch. This case can be
-/// difficult to process as lookahead for hazards is insufficent, as
+/// difficult to process as lookahead for hazards is insufficient, as
/// backwards delay slot fillling can also produce hazards in previously
/// processed instuctions.
///
@@ -103,23 +103,24 @@ static Iter getNextMachineInstrInBB(Iter Position) {
// Find the next real instruction from the current position, looking through
// basic block boundaries.
-static Iter getNextMachineInstr(Iter Position, MachineBasicBlock *Parent) {
+static std::pair<Iter, bool> getNextMachineInstr(Iter Position, MachineBasicBlock * Parent) {
if (Position == Parent->end()) {
- MachineBasicBlock *Succ = Parent->getNextNode();
- if (Succ != nullptr && Parent->isSuccessor(Succ)) {
- Position = Succ->begin();
- Parent = Succ;
- } else {
- llvm_unreachable(
- "Should have identified the end of the function earlier!");
- }
+ do {
+ MachineBasicBlock *Succ = Parent->getNextNode();
+ if (Succ != nullptr && Parent->isSuccessor(Succ)) {
+ Position = Succ->begin();
+ Parent = Succ;
+ } else {
+ return std::make_pair(Position, true);
+ }
+ } while (Parent->empty());
}
Iter Instr = getNextMachineInstrInBB(Position);
if (Instr == Parent->end()) {
return getNextMachineInstr(Instr, Parent);
}
- return Instr;
+ return std::make_pair(Instr, false);
}
bool MipsHazardSchedule::runOnMachineFunction(MachineFunction &MF) {
@@ -145,7 +146,9 @@ bool MipsHazardSchedule::runOnMachineFunction(MachineFunction &MF) {
bool LastInstInFunction =
std::next(I) == FI->end() && std::next(FI) == MF.end();
if (!LastInstInFunction) {
- Inst = getNextMachineInstr(std::next(I), &*FI);
+ std::pair<Iter, bool> Res = getNextMachineInstr(std::next(I), &*FI);
+ LastInstInFunction |= Res.second;
+ Inst = Res.first;
}
if (LastInstInFunction || !TII->SafeInForbiddenSlot(*Inst)) {
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 9c511bd77822..93c5f496ce97 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -112,8 +112,11 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::FIRST_NUMBER: break;
case MipsISD::JmpLink: return "MipsISD::JmpLink";
case MipsISD::TailCall: return "MipsISD::TailCall";
+ case MipsISD::Highest: return "MipsISD::Highest";
+ case MipsISD::Higher: return "MipsISD::Higher";
case MipsISD::Hi: return "MipsISD::Hi";
case MipsISD::Lo: return "MipsISD::Lo";
+ case MipsISD::GotHi: return "MipsISD::GotHi";
case MipsISD::GPRel: return "MipsISD::GPRel";
case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
case MipsISD::Ret: return "MipsISD::Ret";
@@ -144,6 +147,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::Sync: return "MipsISD::Sync";
case MipsISD::Ext: return "MipsISD::Ext";
case MipsISD::Ins: return "MipsISD::Ins";
+ case MipsISD::CIns: return "MipsISD::CIns";
case MipsISD::LWL: return "MipsISD::LWL";
case MipsISD::LWR: return "MipsISD::LWR";
case MipsISD::SWL: return "MipsISD::SWL";
@@ -425,6 +429,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::AssertZext);
+ setTargetDAGCombine(ISD::SHL);
if (ABI.IsO32()) {
// These libcalls are not available in 32-bit.
@@ -699,41 +704,81 @@ static SDValue performCMovFPCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget &Subtarget) {
- // Pattern match EXT.
- // $dst = and ((sra or srl) $src , pos), (2**size - 1)
- // => ext $dst, $src, size, pos
if (DCI.isBeforeLegalizeOps() || !Subtarget.hasExtractInsert())
return SDValue();
- SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
- unsigned ShiftRightOpc = ShiftRight.getOpcode();
-
- // Op's first operand must be a shift right.
- if (ShiftRightOpc != ISD::SRA && ShiftRightOpc != ISD::SRL)
- return SDValue();
+ SDValue FirstOperand = N->getOperand(0);
+ unsigned FirstOperandOpc = FirstOperand.getOpcode();
+ SDValue Mask = N->getOperand(1);
+ EVT ValTy = N->getValueType(0);
+ SDLoc DL(N);
- // The second operand of the shift must be an immediate.
+ uint64_t Pos = 0, SMPos, SMSize;
ConstantSDNode *CN;
- if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1))))
- return SDValue();
-
- uint64_t Pos = CN->getZExtValue();
- uint64_t SMPos, SMSize;
+ SDValue NewOperand;
+ unsigned Opc;
// Op's second operand must be a shifted mask.
if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
!isShiftedMask(CN->getZExtValue(), SMPos, SMSize))
return SDValue();
- // Return if the shifted mask does not start at bit 0 or the sum of its size
- // and Pos exceeds the word's size.
- EVT ValTy = N->getValueType(0);
- if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits())
- return SDValue();
+ if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
+ // Pattern match EXT.
+ // $dst = and ((sra or srl) $src , pos), (2**size - 1)
+ // => ext $dst, $src, pos, size
+
+ // The second operand of the shift must be an immediate.
+ if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
+ return SDValue();
+
+ Pos = CN->getZExtValue();
+
+ // Return if the shifted mask does not start at bit 0 or the sum of its size
+ // and Pos exceeds the word's size.
+ if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits())
+ return SDValue();
+
+ Opc = MipsISD::Ext;
+ NewOperand = FirstOperand.getOperand(0);
+ } else if (FirstOperandOpc == ISD::SHL && Subtarget.hasCnMips()) {
+ // Pattern match CINS.
+ // $dst = and (shl $src , pos), mask
+ // => cins $dst, $src, pos, size
+ // mask is a shifted mask with consecutive 1's, pos = shift amount,
+ // size = population count.
+
+ // The second operand of the shift must be an immediate.
+ if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
+ return SDValue();
+
+ Pos = CN->getZExtValue();
+
+ if (SMPos != Pos || Pos >= ValTy.getSizeInBits() || SMSize >= 32 ||
+ Pos + SMSize > ValTy.getSizeInBits())
+ return SDValue();
+
+ NewOperand = FirstOperand.getOperand(0);
+ // SMSize is 'location' (position) in this case, not size.
+ SMSize--;
+ Opc = MipsISD::CIns;
+ } else {
+ // Pattern match EXT.
+ // $dst = and $src, (2**size - 1) , if size > 16
+ // => ext $dst, $src, pos, size , pos = 0
- SDLoc DL(N);
- return DAG.getNode(MipsISD::Ext, DL, ValTy,
- ShiftRight.getOperand(0),
+ // If the mask is <= 0xffff, andi can be used instead.
+ if (CN->getZExtValue() <= 0xffff)
+ return SDValue();
+
+ // Return if the mask doesn't start at position 0.
+ if (SMPos)
+ return SDValue();
+
+ Opc = MipsISD::Ext;
+ NewOperand = FirstOperand;
+ }
+ return DAG.getNode(Opc, DL, ValTy, NewOperand,
DAG.getConstant(Pos, DL, MVT::i32),
DAG.getConstant(SMSize, DL, MVT::i32));
}
@@ -852,6 +897,58 @@ static SDValue performAssertZextCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+
+static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget &Subtarget) {
+ // Pattern match CINS.
+ // $dst = shl (and $src , imm), pos
+ // => cins $dst, $src, pos, size
+
+ if (DCI.isBeforeLegalizeOps() || !Subtarget.hasCnMips())
+ return SDValue();
+
+ SDValue FirstOperand = N->getOperand(0);
+ unsigned FirstOperandOpc = FirstOperand.getOpcode();
+ SDValue SecondOperand = N->getOperand(1);
+ EVT ValTy = N->getValueType(0);
+ SDLoc DL(N);
+
+ uint64_t Pos = 0, SMPos, SMSize;
+ ConstantSDNode *CN;
+ SDValue NewOperand;
+
+ // The second operand of the shift must be an immediate.
+ if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)))
+ return SDValue();
+
+ Pos = CN->getZExtValue();
+
+ if (Pos >= ValTy.getSizeInBits())
+ return SDValue();
+
+ if (FirstOperandOpc != ISD::AND)
+ return SDValue();
+
+ // AND's second operand must be a shifted mask.
+ if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
+ !isShiftedMask(CN->getZExtValue(), SMPos, SMSize))
+ return SDValue();
+
+ // Return if the shifted mask does not start at bit 0 or the sum of its size
+ // and Pos exceeds the word's size.
+ if (SMPos != 0 || SMSize > 32 || Pos + SMSize > ValTy.getSizeInBits())
+ return SDValue();
+
+ NewOperand = FirstOperand.getOperand(0);
+ // SMSize is 'location' (position) in this case, not size.
+ SMSize--;
+
+ return DAG.getNode(MipsISD::CIns, DL, ValTy, NewOperand,
+ DAG.getConstant(Pos, DL, MVT::i32),
+ DAG.getConstant(SMSize, DL, MVT::i32));
+}
+
SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
SelectionDAG &DAG = DCI.DAG;
@@ -875,6 +972,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return performADDCombine(N, DAG, DCI, Subtarget);
case ISD::AssertZext:
return performAssertZextCombine(N, DAG, DCI, Subtarget);
+ case ISD::SHL:
+ return performSHLCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
@@ -1733,7 +1832,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = N->getGlobal();
- if (!isPositionIndependent() && !ABI.IsN64()) {
+ if (!isPositionIndependent()) {
const MipsTargetObjectFile *TLOF =
static_cast<const MipsTargetObjectFile *>(
getTargetMachine().getObjFileLowering());
@@ -1742,8 +1841,10 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
// %gp_rel relocation
return getAddrGPRel(N, SDLoc(N), Ty, DAG);
- // %hi/%lo relocation
- return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
+ // %hi/%lo relocation
+ return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG)
+ // %highest/%higher/%hi/%lo relocation
+ : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG);
}
// Every other architecture would use shouldAssumeDSOLocal in here, but
@@ -1777,8 +1878,9 @@ SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
EVT Ty = Op.getValueType();
- if (!isPositionIndependent() && !ABI.IsN64())
- return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
+ if (!isPositionIndependent())
+ return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG)
+ : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG);
return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
}
@@ -1820,8 +1922,9 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(DL).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args));
+ CLI.setDebugLoc(DL)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Ret = CallResult.first;
@@ -1870,8 +1973,9 @@ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
EVT Ty = Op.getValueType();
- if (!isPositionIndependent() && !ABI.IsN64())
- return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
+ if (!isPositionIndependent())
+ return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG)
+ : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG);
return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
}
@@ -1882,7 +1986,7 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
EVT Ty = Op.getValueType();
- if (!isPositionIndependent() && !ABI.IsN64()) {
+ if (!isPositionIndependent()) {
const MipsTargetObjectFile *TLOF =
static_cast<const MipsTargetObjectFile *>(
getTargetMachine().getObjFileLowering());
@@ -1892,10 +1996,11 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
// %gp_rel relocation
return getAddrGPRel(N, SDLoc(N), Ty, DAG);
- return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
+ return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG)
+ : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG);
}
- return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
+ return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
}
SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -2796,14 +2901,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
- bool IsPICCall = (ABI.IsN64() || IsPIC); // true if calls are translated to
- // jalr $25
+
SDValue CalleeLo;
EVT Ty = Callee.getValueType();
bool GlobalOrExternal = false, IsCallReloc = false;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- if (IsPICCall) {
+ if (IsPIC) {
const GlobalValue *Val = G->getGlobal();
InternalLinkage = Val->hasInternalLinkage();
@@ -2828,7 +2932,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
- if (!ABI.IsN64() && !IsPIC) // !N64 && static
+ if (!IsPIC) // static
Callee = DAG.getTargetExternalSymbol(
Sym, getPointerTy(DAG.getDataLayout()), MipsII::MO_NO_FLAG);
else if (LargeGOT) {
@@ -2836,7 +2940,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MipsII::MO_CALL_LO16, Chain,
FuncInfo->callPtrInfo(Sym));
IsCallReloc = true;
- } else { // N64 || PIC
+ } else { // PIC
Callee = getAddrGlobal(S, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
FuncInfo->callPtrInfo(Sym));
IsCallReloc = true;
@@ -2848,7 +2952,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<SDValue, 8> Ops(1, Chain);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage,
+ getOpndList(Ops, RegsToPass, IsPIC, GlobalOrExternal, InternalLinkage,
IsCallReloc, CLI, Callee, Chain);
if (IsTailCall) {
@@ -3683,7 +3787,9 @@ bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
}
unsigned MipsTargetLowering::getJumpTableEncoding() const {
- if (ABI.IsN64())
+
+ // FIXME: For space reasons this should be: EK_GPRel32BlockAddress.
+ if (ABI.IsN64() && isPositionIndependent())
return MachineJumpTableInfo::EK_GPRel64BlockAddress;
return TargetLowering::getJumpTableEncoding();
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index cddf0903ca6a..2dcafd51061a 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -37,14 +37,23 @@ namespace llvm {
// Tail call
TailCall,
- // Get the Higher 16 bits from a 32-bit immediate
+ // Get the Highest (63-48) 16 bits from a 64-bit immediate
+ Highest,
+
+ // Get the Higher (47-32) 16 bits from a 64-bit immediate
+ Higher,
+
+ // Get the High 16 bits from a 32/64-bit immediate
// No relation with Mips Hi register
Hi,
- // Get the Lower 16 bits from a 32-bit immediate
+ // Get the Lower 16 bits from a 32/64-bit immediate
// No relation with Mips Lo register
Lo,
+ // Get the High 16 bits from a 32 bit immediate for accessing the GOT.
+ GotHi,
+
// Handle gp_rel (small data/bss sections) relocation.
GPRel,
@@ -107,6 +116,7 @@ namespace llvm {
Ext,
Ins,
+ CIns,
// EXTR.W instrinsic nodes.
EXTP,
@@ -297,7 +307,7 @@ namespace llvm {
}
bool isJumpTableRelative() const override {
- return getTargetMachine().isPositionIndependent() || ABI.IsN64();
+ return getTargetMachine().isPositionIndependent();
}
protected:
@@ -344,8 +354,8 @@ namespace llvm {
SelectionDAG &DAG, unsigned HiFlag,
unsigned LoFlag, SDValue Chain,
const MachinePointerInfo &PtrInfo) const {
- SDValue Hi =
- DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(N, Ty, DAG, HiFlag));
+ SDValue Hi = DAG.getNode(MipsISD::GotHi, DL, Ty,
+ getTargetNode(N, Ty, DAG, HiFlag));
Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
getTargetNode(N, Ty, DAG, LoFlag));
@@ -356,6 +366,8 @@ namespace llvm {
// computing a symbol's address in non-PIC mode:
//
// (add %hi(sym), %lo(sym))
+ //
+ // This method covers O32, N32 and N64 in sym32 mode.
template <class NodeTy>
SDValue getAddrNonPIC(NodeTy *N, const SDLoc &DL, EVT Ty,
SelectionDAG &DAG) const {
@@ -364,7 +376,37 @@ namespace llvm {
return DAG.getNode(ISD::ADD, DL, Ty,
DAG.getNode(MipsISD::Hi, DL, Ty, Hi),
DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
- }
+ }
+
+ // This method creates the following nodes, which are necessary for
+ // computing a symbol's address in non-PIC mode for N64.
+ //
+ // (add (shl (add (shl (add %highest(sym), %higher(sim)), 16), %high(sym)),
+ // 16), %lo(%sym))
+ //
+ // FIXME: This method is not efficent for (micro)MIPS64R6.
+ template <class NodeTy>
+ SDValue getAddrNonPICSym64(NodeTy *N, const SDLoc &DL, EVT Ty,
+ SelectionDAG &DAG) const {
+ SDValue Hi = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_HI);
+ SDValue Lo = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_LO);
+
+ SDValue Highest =
+ DAG.getNode(MipsISD::Highest, DL, Ty,
+ getTargetNode(N, Ty, DAG, MipsII::MO_HIGHEST));
+ SDValue Higher = getTargetNode(N, Ty, DAG, MipsII::MO_HIGHER);
+ SDValue HigherPart =
+ DAG.getNode(ISD::ADD, DL, Ty, Highest,
+ DAG.getNode(MipsISD::Higher, DL, Ty, Higher));
+ SDValue Cst = DAG.getConstant(16, DL, MVT::i32);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, Ty, HigherPart, Cst);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, Ty, Shift,
+ DAG.getNode(MipsISD::Hi, DL, Ty, Hi));
+ SDValue Shift2 = DAG.getNode(ISD::SHL, DL, Ty, Add, Cst);
+
+ return DAG.getNode(ISD::ADD, DL, Ty, Shift2,
+ DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
+ }
// This method creates the following nodes, which are necessary for
// computing a symbol's address using gp-relative addressing:
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 19af1914c819..df62c66b75a3 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -482,7 +482,7 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
MIB->RemoveOperand(0);
for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
- MIB.addOperand(I->getOperand(J));
+ MIB.add(I->getOperand(J));
}
MIB.addImm(0);
@@ -492,7 +492,7 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J)
continue;
- MIB.addOperand(I->getOperand(J));
+ MIB.add(I->getOperand(J));
}
}
@@ -501,3 +501,31 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end());
return MIB;
}
+
+bool MipsInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ assert(!MI.isBundle() &&
+ "TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (!MCID.isCommutable())
+ return false;
+
+ switch (MI.getOpcode()) {
+ case Mips::DPADD_U_H:
+ case Mips::DPADD_U_W:
+ case Mips::DPADD_U_D:
+ case Mips::DPADD_S_H:
+ case Mips::DPADD_S_W:
+ case Mips::DPADD_S_D: {
+ // The first operand is both input and output, so it should not commute
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3))
+ return false;
+
+ if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg())
+ return false;
+ return true;
+ }
+ }
+ return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 347b9187d08c..45d700d8afd6 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -135,6 +135,9 @@ public:
MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc,
MachineBasicBlock::iterator I) const;
+ bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const override;
+
protected:
bool isZeroImm(const MachineOperand &op) const;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 5bc48336121a..b90077d7807d 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -59,10 +59,20 @@ def MipsTailCall : SDNode<"MipsISD::TailCall", SDT_MipsJmpLink,
// Hi and Lo nodes are used to handle global addresses. Used on
// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
// static model. (nothing to do with Mips Registers Hi and Lo)
+
+// Hi is the odd node out, on MIPS64 it can expand to either daddiu when
+// using static relocations with 64 bit symbols, or lui when using 32 bit
+// symbols.
+def MipsHigher : SDNode<"MipsISD::Higher", SDTIntUnaryOp>;
+def MipsHighest : SDNode<"MipsISD::Highest", SDTIntUnaryOp>;
def MipsHi : SDNode<"MipsISD::Hi", SDTIntUnaryOp>;
def MipsLo : SDNode<"MipsISD::Lo", SDTIntUnaryOp>;
+
def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>;
+// Hi node for accessing the GOT.
+def MipsGotHi : SDNode<"MipsISD::GotHi", SDTIntUnaryOp>;
+
// TlsGd node is used to handle General Dynamic TLS
def MipsTlsGd : SDNode<"MipsISD::TlsGd", SDTIntUnaryOp>;
@@ -128,6 +138,7 @@ def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain,SDNPSideEffect]>;
def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>;
def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>;
+def MipsCIns : SDNode<"MipsISD::CIns", SDT_Ext>;
def MipsLWL : SDNode<"MipsISD::LWL", SDTMipsLoadLR,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -205,6 +216,10 @@ def HasCnMips : Predicate<"Subtarget->hasCnMips()">,
AssemblerPredicate<"FeatureCnMips">;
def NotCnMips : Predicate<"!Subtarget->hasCnMips()">,
AssemblerPredicate<"!FeatureCnMips">;
+def IsSym32 : Predicate<"Subtarget->HasSym32()">,
+ AssemblerPredicate<"FeatureSym32">;
+def IsSym64 : Predicate<"!Subtarget->HasSym32()">,
+ AssemblerPredicate<"!FeatureSym32">;
def RelocNotPIC : Predicate<"!TM.isPositionIndependent()">;
def RelocPIC : Predicate<"TM.isPositionIndependent()">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
@@ -237,6 +252,14 @@ class PTR_32 { list<Predicate> PTRPredicates = [IsPTR32bit]; }
class PTR_64 { list<Predicate> PTRPredicates = [IsPTR64bit]; }
//===----------------------------------------------------------------------===//
+// Mips Symbol size adjectives.
+// They are mutally exculsive.
+//===----------------------------------------------------------------------===//
+
+class SYM_32 { list<Predicate> SYMPredicates = [IsSym32]; }
+class SYM_64 { list<Predicate> SYMPredicates = [IsSym64]; }
+
+//===----------------------------------------------------------------------===//
// Mips ISA/ASE membership and instruction group membership adjectives.
// They are mutually exclusive.
//===----------------------------------------------------------------------===//
@@ -519,7 +542,7 @@ def UImm32CoercedAsmOperandClass : UImmAnyAsmOperandClass<33, []> {
def SImm32RelaxedAsmOperandClass
: SImmAsmOperandClass<32, [UImm32CoercedAsmOperandClass]> {
let Name = "SImm32_Relaxed";
- let PredicateMethod = "isAnyImm<32>";
+ let PredicateMethod = "isAnyImm<33>";
let DiagnosticType = "SImm32_Relaxed";
}
def SImm32AsmOperandClass
@@ -1150,6 +1173,10 @@ def immZExt5Plus33 : PatLeaf<(imm), [{
return isUInt<5>(N->getZExtValue() - 33);
}]>;
+def immZExt5To31 : SDNodeXForm<imm, [{
+ return getImm(N, 31 - N->getZExtValue());
+}]>;
+
// True if (N + 1) fits in 16-bit field.
def immSExt16Plus1 : PatLeaf<(imm), [{
return isInt<17>(N->getSExtValue()) && isInt<16>(N->getSExtValue() + 1);
@@ -2281,9 +2308,38 @@ def SEQIMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
def : MipsInstAlias<"seq $rd, $imm",
(SEQIMacro GPR32Opnd:$rd, GPR32Opnd:$rd, simm32:$imm), 0>,
NOT_ASE_CNMIPS;
+
+def MULImmMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rd, GPR32Opnd:$rs,
+ simm32_relaxed:$imm),
+ "mul\t$rd, $rs, $imm">,
+ ISA_MIPS1_NOT_32R6_64R6;
+def MULOMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rd, GPR32Opnd:$rs,
+ GPR32Opnd:$rt),
+ "mulo\t$rd, $rs, $rt">,
+ ISA_MIPS1_NOT_32R6_64R6;
+def MULOUMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rd, GPR32Opnd:$rs,
+ GPR32Opnd:$rt),
+ "mulou\t$rd, $rs, $rt">,
+ ISA_MIPS1_NOT_32R6_64R6;
+
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
+
+multiclass OneOrTwoOperandMacroImmediateAlias<string Memnomic,
+ Instruction Opcode,
+ RegisterOperand RO = GPR32Opnd,
+ Operand Imm = simm32_relaxed> {
+ def : MipsInstAlias<!strconcat(Memnomic, " $rs, $rt, $imm"),
+ (Opcode RO:$rs,
+ RO:$rt,
+ Imm:$imm), 0>;
+ def : MipsInstAlias<!strconcat(Memnomic, " $rs, $imm"),
+ (Opcode RO:$rs,
+ RO:$rs,
+ Imm:$imm), 0>;
+}
+
def : MipsInstAlias<"move $dst, $src",
(OR GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>,
GPR_32 {
@@ -2296,26 +2352,7 @@ def : MipsInstAlias<"move $dst, $src",
}
def : MipsInstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>,
ISA_MIPS1_NOT_32R6_64R6;
-def : MipsInstAlias<
- "addu $rs, $rt, $imm",
- (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
-def : MipsInstAlias<
- "addu $rs, $imm",
- (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
-def : MipsInstAlias<
- "add $rs, $rt, $imm",
- (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>,
- ISA_MIPS1_NOT_32R6_64R6;
-def : MipsInstAlias<
- "add $rs, $imm",
- (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>,
- ISA_MIPS1_NOT_32R6_64R6;
-def : MipsInstAlias<
- "and $rs, $rt, $imm",
- (ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
-def : MipsInstAlias<
- "and $rs, $imm",
- (ANDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
+
def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>;
let Predicates = [NotInMicroMips] in {
def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
@@ -2343,36 +2380,26 @@ let AdditionalPredicates = [NotInMicroMips] in {
"sgtu $$rs, $rt",
(SLTu GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
def : MipsInstAlias<
- "slt $rs, $rt, $imm",
- (SLTi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "sltu $rt, $rs, $imm",
- (SLTiu GPR32Opnd:$rt, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "and $rs, $rt, $imm",
- (ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "and $rs, $imm",
- (ANDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "xor $rs, $rt, $imm",
- (XORi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "xor $rs, $imm",
- (XORi GPR32Opnd:$rs, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "or $rs, $rt, $imm",
- (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "or $rs, $imm",
- (ORi GPR32Opnd:$rs, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
"not $rt, $rs",
(NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>;
def : MipsInstAlias<
"not $rt",
(NOR GPR32Opnd:$rt, GPR32Opnd:$rt, ZERO), 0>;
def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"add", ADDi>, ISA_MIPS1_NOT_32R6_64R6;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"addu", ADDiu>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"and", ANDi>, GPR_32;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"or", ORi>, GPR_32;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"xor", XORi>, GPR_32;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"slt", SLTi>, GPR_32;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"sltu", SLTiu>, GPR_32;
}
def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, COP0Opnd:$rd, 0), 0>;
def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 COP0Opnd:$rd, GPR32Opnd:$rt, 0), 0>;
@@ -2445,6 +2472,14 @@ let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"sdbbp", (SDBBP 0)>, ISA_MIPS32_NOT_32R6_64R6;
def : MipsInstAlias<"sync",
(SYNC 0), 1>, ISA_MIPS2;
+
+def : MipsInstAlias<"mulo $rs, $rt",
+ (MULOMacro GPR32Opnd:$rs, GPR32Opnd:$rs, GPR32Opnd:$rt), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"mulou $rs, $rt",
+ (MULOUMacro GPR32Opnd:$rs, GPR32Opnd:$rs, GPR32Opnd:$rt), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
+
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -2472,9 +2507,12 @@ def JalTwoReg : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs),
def JalOneReg : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs),
"jal\t$rs"> ;
-def NORImm : MipsAsmPseudoInst<
- (outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm32:$imm),
- "nor\t$rs, $rt, $imm"> ;
+class NORIMM_DESC_BASE<RegisterOperand RO, DAGOperand Imm> :
+ MipsAsmPseudoInst<(outs RO:$rs), (ins RO:$rt, Imm:$imm),
+ "nor\t$rs, $rt, $imm">;
+def NORImm : NORIMM_DESC_BASE<GPR32Opnd, simm32_relaxed>, GPR_32;
+def : MipsInstAlias<"nor\t$rs, $imm", (NORImm GPR32Opnd:$rs, GPR32Opnd:$rs,
+ simm32_relaxed:$imm)>, GPR_32;
let hasDelaySlot = 1, isCTI = 1 in {
def BneImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rt),
@@ -2512,6 +2550,9 @@ class CondBranchImmPseudo<string instr_asm> :
MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, imm64:$imm, brtarget:$offset),
!strconcat(instr_asm, "\t$rs, $imm, $offset")>;
+def BEQLImmMacro : CondBranchImmPseudo<"beql">, ISA_MIPS2_NOT_32R6_64R6;
+def BNELImmMacro : CondBranchImmPseudo<"bnel">, ISA_MIPS2_NOT_32R6_64R6;
+
def BLTImmMacro : CondBranchImmPseudo<"blt">;
def BLEImmMacro : CondBranchImmPseudo<"ble">;
def BGEImmMacro : CondBranchImmPseudo<"bge">;
@@ -2535,34 +2576,46 @@ def BGTULImmMacro : CondBranchImmPseudo<"bgtul">, ISA_MIPS2_NOT_32R6_64R6;
// Once the tablegen-erated errors are made better, this needs to be fixed and
// predicates needs to be restored.
-def SDivMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+def SDivMacro : MipsAsmPseudoInst<(outs GPR32NonZeroOpnd:$rd),
(ins GPR32Opnd:$rs, GPR32Opnd:$rt),
"div\t$rd, $rs, $rt">,
ISA_MIPS1_NOT_32R6_64R6;
+def SDivIMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, simm32:$imm),
+ "div\t$rd, $rs, $imm">,
+ ISA_MIPS1_NOT_32R6_64R6;
def UDivMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
(ins GPR32Opnd:$rs, GPR32Opnd:$rt),
"divu\t$rd, $rs, $rt">,
ISA_MIPS1_NOT_32R6_64R6;
-def : MipsInstAlias<"div $rt, $rs", (SDivMacro GPR32Opnd:$rt, GPR32Opnd:$rt,
- GPR32Opnd:$rs), 0>,
+def UDivIMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, simm32:$imm),
+ "divu\t$rd, $rs, $imm">,
+ ISA_MIPS1_NOT_32R6_64R6;
+
+
+def : MipsInstAlias<"div $rs, $rt", (SDIV GPR32ZeroOpnd:$rs,
+ GPR32Opnd:$rt), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"div $rs, $rt", (SDivMacro GPR32NonZeroOpnd:$rs,
+ GPR32NonZeroOpnd:$rs,
+ GPR32Opnd:$rt), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"div $rd, $imm", (SDivIMacro GPR32Opnd:$rd, GPR32Opnd:$rd,
+ simm32:$imm), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
+
+def : MipsInstAlias<"divu $rt, $rs", (UDIV GPR32ZeroOpnd:$rt,
+ GPR32Opnd:$rs), 0>,
ISA_MIPS1_NOT_32R6_64R6;
-def : MipsInstAlias<"divu $rt, $rs", (UDivMacro GPR32Opnd:$rt, GPR32Opnd:$rt,
+def : MipsInstAlias<"divu $rt, $rs", (UDivMacro GPR32NonZeroOpnd:$rt,
+ GPR32NonZeroOpnd:$rt,
GPR32Opnd:$rs), 0>,
ISA_MIPS1_NOT_32R6_64R6;
-def DSDivMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
- (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
- "ddiv\t$rd, $rs, $rt">,
- ISA_MIPS64_NOT_64R6;
-def DUDivMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
- (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
- "ddivu\t$rd, $rs, $rt">,
- ISA_MIPS64_NOT_64R6;
-def : MipsInstAlias<"ddiv $rt, $rs", (DSDivMacro GPR32Opnd:$rt, GPR32Opnd:$rt,
- GPR32Opnd:$rs), 0>,
- ISA_MIPS64_NOT_64R6;
-def : MipsInstAlias<"ddivu $rt, $rs", (DUDivMacro GPR32Opnd:$rt, GPR32Opnd:$rt,
- GPR32Opnd:$rs), 0>,
- ISA_MIPS64_NOT_64R6;
+
+def : MipsInstAlias<"divu $rd, $imm", (UDivIMacro GPR32Opnd:$rd, GPR32Opnd:$rd,
+ simm32:$imm), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
def Ulh : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
"ulh\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
@@ -2647,30 +2700,40 @@ def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
(TAILCALL texternalsym:$dst)>;
// hi/lo relocs
-def : MipsPat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
-def : MipsPat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
-def : MipsPat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
-def : MipsPat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
-def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
-def : MipsPat<(MipsHi texternalsym:$in), (LUi texternalsym:$in)>;
-
-def : MipsPat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
-def : MipsPat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
-def : MipsPat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
-def : MipsPat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
-def : MipsPat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
-def : MipsPat<(MipsLo texternalsym:$in), (ADDiu ZERO, texternalsym:$in)>;
-
-def : MipsPat<(add GPR32:$hi, (MipsLo tglobaladdr:$lo)),
- (ADDiu GPR32:$hi, tglobaladdr:$lo)>;
-def : MipsPat<(add GPR32:$hi, (MipsLo tblockaddress:$lo)),
- (ADDiu GPR32:$hi, tblockaddress:$lo)>;
-def : MipsPat<(add GPR32:$hi, (MipsLo tjumptable:$lo)),
- (ADDiu GPR32:$hi, tjumptable:$lo)>;
-def : MipsPat<(add GPR32:$hi, (MipsLo tconstpool:$lo)),
- (ADDiu GPR32:$hi, tconstpool:$lo)>;
-def : MipsPat<(add GPR32:$hi, (MipsLo tglobaltlsaddr:$lo)),
- (ADDiu GPR32:$hi, tglobaltlsaddr:$lo)>;
+multiclass MipsHiLoRelocs<Instruction Lui, Instruction Addiu,
+ Register ZeroReg, RegisterOperand GPROpnd> {
+ def : MipsPat<(MipsHi tglobaladdr:$in), (Lui tglobaladdr:$in)>;
+ def : MipsPat<(MipsHi tblockaddress:$in), (Lui tblockaddress:$in)>;
+ def : MipsPat<(MipsHi tjumptable:$in), (Lui tjumptable:$in)>;
+ def : MipsPat<(MipsHi tconstpool:$in), (Lui tconstpool:$in)>;
+ def : MipsPat<(MipsHi tglobaltlsaddr:$in), (Lui tglobaltlsaddr:$in)>;
+ def : MipsPat<(MipsHi texternalsym:$in), (Lui texternalsym:$in)>;
+
+ def : MipsPat<(MipsLo tglobaladdr:$in), (Addiu ZeroReg, tglobaladdr:$in)>;
+ def : MipsPat<(MipsLo tblockaddress:$in),
+ (Addiu ZeroReg, tblockaddress:$in)>;
+ def : MipsPat<(MipsLo tjumptable:$in), (Addiu ZeroReg, tjumptable:$in)>;
+ def : MipsPat<(MipsLo tconstpool:$in), (Addiu ZeroReg, tconstpool:$in)>;
+ def : MipsPat<(MipsLo tglobaltlsaddr:$in),
+ (Addiu ZeroReg, tglobaltlsaddr:$in)>;
+ def : MipsPat<(MipsLo texternalsym:$in), (Addiu ZeroReg, texternalsym:$in)>;
+
+ def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaladdr:$lo)),
+ (Addiu GPROpnd:$hi, tglobaladdr:$lo)>;
+ def : MipsPat<(add GPROpnd:$hi, (MipsLo tblockaddress:$lo)),
+ (Addiu GPROpnd:$hi, tblockaddress:$lo)>;
+ def : MipsPat<(add GPROpnd:$hi, (MipsLo tjumptable:$lo)),
+ (Addiu GPROpnd:$hi, tjumptable:$lo)>;
+ def : MipsPat<(add GPROpnd:$hi, (MipsLo tconstpool:$lo)),
+ (Addiu GPROpnd:$hi, tconstpool:$lo)>;
+ def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (Addiu GPROpnd:$hi, tglobaltlsaddr:$lo)>;
+}
+
+defm : MipsHiLoRelocs<LUi, ADDiu, ZERO, GPR32Opnd>;
+
+def : MipsPat<(MipsGotHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : MipsPat<(MipsGotHi texternalsym:$in), (LUi texternalsym:$in)>;
// gp_rel relocs
def : MipsPat<(add GPR32:$gp, (MipsGPRel tglobaladdr:$in)),
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 1087d0e0140e..100503700a72 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -13,20 +13,31 @@
// FIXME: Fix pc-region jump instructions which cross 256MB segment boundaries.
//===----------------------------------------------------------------------===//
-#include "Mips.h"
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MCTargetDesc/MipsMCNaCl.h"
+#include "Mips.h"
+#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
@@ -47,21 +58,23 @@ static cl::opt<bool> ForceLongBranch(
cl::Hidden);
namespace {
+
typedef MachineBasicBlock::iterator Iter;
typedef MachineBasicBlock::reverse_iterator ReverseIter;
struct MBBInfo {
- uint64_t Size, Address;
- bool HasLongBranch;
- MachineInstr *Br;
+ uint64_t Size = 0;
+ uint64_t Address;
+ bool HasLongBranch = false;
+ MachineInstr *Br = nullptr;
- MBBInfo() : Size(0), HasLongBranch(false), Br(nullptr) {}
+ MBBInfo() = default;
};
class MipsLongBranch : public MachineFunctionPass {
-
public:
static char ID;
+
MipsLongBranch(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm), IsPIC(TM.isPositionIndependent()),
ABI(static_cast<const MipsTargetMachine &>(TM).getABI()) {}
@@ -92,13 +105,8 @@ namespace {
};
char MipsLongBranch::ID = 0;
-} // end of anonymous namespace
-/// createMipsLongBranchPass - Returns a pass that converts branches to long
-/// branches.
-FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) {
- return new MipsLongBranch(tm);
-}
+} // end anonymous namespace
/// Iterate over list of Br's operands and search for a MachineBasicBlock
/// operand.
@@ -530,3 +538,9 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
return true;
}
+
+/// createMipsLongBranchPass - Returns a pass that converts branches to long
+/// branches.
+FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) {
+ return new MipsLongBranch(tm);
+}
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index d0609b15341d..5bf4c958c7b9 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -7,16 +7,15 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "MipsInstrInfo.h"
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsMachineFunction.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -24,7 +23,7 @@ static cl::opt<bool>
FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
cl::desc("Always use $gp as the global base register."));
-MipsFunctionInfo::~MipsFunctionInfo() {}
+MipsFunctionInfo::~MipsFunctionInfo() = default;
bool MipsFunctionInfo::globalBaseRegSet() const {
return GlobalBaseReg;
@@ -101,4 +100,4 @@ int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
return MoveF64ViaSpillFI;
}
-void MipsFunctionInfo::anchor() { }
+void MipsFunctionInfo::anchor() {}
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index c9e5fddc1932..553a66703b26 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -1,4 +1,4 @@
-//===-- MipsMachineFunctionInfo.h - Private data used for Mips ----*- C++ -*-=//
+//===- MipsMachineFunctionInfo.h - Private data used for Mips ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,12 +15,8 @@
#define LLVM_LIB_TARGET_MIPS_MIPSMACHINEFUNCTION_H
#include "Mips16HardFloatInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include <map>
namespace llvm {
@@ -29,12 +25,9 @@ namespace llvm {
/// Mips target-specific information for each MachineFunction.
class MipsFunctionInfo : public MachineFunctionInfo {
public:
- MipsFunctionInfo(MachineFunction &MF)
- : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0),
- CallsEhReturn(false), IsISR(false), SaveS2(false),
- MoveF64ViaSpillFI(-1) {}
+ MipsFunctionInfo(MachineFunction &MF) : MF(MF) {}
- ~MipsFunctionInfo();
+ ~MipsFunctionInfo() override;
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
@@ -81,25 +74,26 @@ public:
int getMoveF64ViaSpillFI(const TargetRegisterClass *RC);
- std::map<const char *, const llvm::Mips16HardFloatInfo::FuncSignature *>
+ std::map<const char *, const Mips16HardFloatInfo::FuncSignature *>
StubsNeeded;
private:
virtual void anchor();
MachineFunction& MF;
+
/// SRetReturnReg - Some subtargets require that sret lowering includes
/// returning the value of the returned struct in a register. This field
/// holds the virtual register into which the sret argument is passed.
- unsigned SRetReturnReg;
+ unsigned SRetReturnReg = 0;
/// GlobalBaseReg - keeps track of the virtual register initialized for
/// use as the global base register. This is used for PIC in some PIC
/// relocation models.
- unsigned GlobalBaseReg;
+ unsigned GlobalBaseReg = 0;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
+ int VarArgsFrameIndex = 0;
/// True if function has a byval argument.
bool HasByvalArg;
@@ -108,25 +102,25 @@ private:
unsigned IncomingArgSize;
/// CallsEhReturn - Whether the function calls llvm.eh.return.
- bool CallsEhReturn;
+ bool CallsEhReturn = false;
/// Frame objects for spilling eh data registers.
int EhDataRegFI[4];
/// ISR - Whether the function is an Interrupt Service Routine.
- bool IsISR;
+ bool IsISR = false;
/// Frame objects for spilling C0_STATUS, C0_EPC
int ISRDataRegFI[2];
// saveS2
- bool SaveS2;
+ bool SaveS2 = false;
/// FrameIndex for expanding BuildPairF64 nodes to spill and reload when the
/// O32 FPXX ABI is enabled. -1 is used to denote invalid index.
- int MoveF64ViaSpillFI;
+ int MoveF64ViaSpillFI = -1;
};
-} // end of namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_MIPS_MIPSMACHINEFUNCTION_H
diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h
index 23f0b7070d62..4708784063d3 100644
--- a/lib/Target/Mips/MipsOptionRecord.h
+++ b/lib/Target/Mips/MipsOptionRecord.h
@@ -1,4 +1,4 @@
-//===-- MipsOptionRecord.h - Abstraction for storing information ----------===//
+//===- MipsOptionRecord.h - Abstraction for storing information -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,14 +23,16 @@
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include <cstdint>
namespace llvm {
+
class MipsELFStreamer;
-class MCSubtargetInfo;
class MipsOptionRecord {
public:
- virtual ~MipsOptionRecord(){};
+ virtual ~MipsOptionRecord() = default;
+
virtual void EmitMipsOptionRecord() = 0;
};
@@ -53,7 +55,8 @@ public:
COP2RegClass = &(TRI->getRegClass(Mips::COP2RegClassID));
COP3RegClass = &(TRI->getRegClass(Mips::COP3RegClassID));
}
- ~MipsRegInfoRecord() override {}
+
+ ~MipsRegInfoRecord() override = default;
void EmitMipsOptionRecord() override;
void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo);
@@ -74,5 +77,7 @@ private:
uint32_t ri_cprmask[4];
int64_t ri_gp_value;
};
-} // namespace llvm
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_MIPS_MIPSOPTIONRECORD_H
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 51ac5620f585..670b6c96e78e 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -57,7 +57,7 @@ static bool needsFPFromSig(Function &F) {
;
}
if (F.arg_size() >=1) {
- Argument &Arg = F.getArgumentList().front();
+ Argument &Arg = *F.arg_begin();
switch (Arg.getType()->getTypeID()) {
case Type::FloatTyID:
case Type::DoubleTyID:
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 8c82239ebbd3..ccfdcc89b078 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -290,6 +290,25 @@ class GPR32Class<list<ValueType> regTypes> :
K0, K1, GP, SP, FP, RA)>;
def GPR32 : GPR32Class<[i32]>;
+
+def GPR32ZERO : RegisterClass<"Mips", [i32], 32, (add
+ // Reserved
+ ZERO)>;
+
+def GPR32NONZERO : RegisterClass<"Mips", [i32], 32, (add
+ // Reserved
+ AT,
+ // Return Values and Arguments
+ V0, V1, A0, A1, A2, A3,
+ // Not preserved across procedure calls
+ T0, T1, T2, T3, T4, T5, T6, T7,
+ // Callee save
+ S0, S1, S2, S3, S4, S5, S6, S7,
+ // Not preserved across procedure calls
+ T8, T9,
+ // Reserved
+ K0, K1, GP, SP, FP, RA)>;
+
def DSPR : GPR32Class<[v4i8, v2i16]>;
def GPRMM16 : RegisterClass<"Mips", [i32], 32, (add
@@ -317,7 +336,7 @@ def GPRMM16MoveP : RegisterClass<"Mips", [i32], 32, (add
S0, S2, S3, S4)>;
def GPR64 : RegisterClass<"Mips", [i64], 64, (add
-// Reserved
+ // Reserved
ZERO_64, AT_64,
// Return Values and Arguments
V0_64, V1_64, A0_64, A1_64, A2_64, A3_64,
@@ -479,6 +498,16 @@ def GPR64AsmOperand : MipsAsmRegOperand {
let PredicateMethod = "isGPRAsmReg";
}
+def GPR32ZeroAsmOperand : MipsAsmRegOperand {
+ let Name = "GPR32ZeroAsmReg";
+ let PredicateMethod = "isGPRZeroAsmReg";
+}
+
+def GPR32NonZeroAsmOperand : MipsAsmRegOperand {
+ let Name = "GPR32NonZeroAsmReg";
+ let PredicateMethod = "isGPRNonZeroAsmReg";
+}
+
def GPR32AsmOperand : MipsAsmRegOperand {
let Name = "GPR32AsmReg";
let PredicateMethod = "isGPRAsmReg";
@@ -550,6 +579,14 @@ def MSACtrlAsmOperand : MipsAsmRegOperand {
let Name = "MSACtrlAsmReg";
}
+def GPR32ZeroOpnd : RegisterOperand<GPR32ZERO> {
+ let ParserMatchClass = GPR32ZeroAsmOperand;
+}
+
+def GPR32NonZeroOpnd : RegisterOperand<GPR32NONZERO> {
+ let ParserMatchClass = GPR32NonZeroAsmOperand;
+}
+
def GPR32Opnd : RegisterOperand<GPR32> {
let ParserMatchClass = GPR32AsmOperand;
}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 4996d070eb29..ef8d18c6deb1 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -11,27 +11,42 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsSEFrameLowering.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "MipsSEFrameLowering.h"
#include "MipsSEInstrInfo.h"
#include "MipsSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
+#include <vector>
using namespace llvm;
-namespace {
-typedef MachineBasicBlock::iterator Iter;
-
static std::pair<unsigned, unsigned> getMFHiLoOpc(unsigned Src) {
if (Mips::ACC64RegClass.contains(Src))
return std::make_pair((unsigned)Mips::PseudoMFHI,
@@ -47,6 +62,8 @@ static std::pair<unsigned, unsigned> getMFHiLoOpc(unsigned Src) {
return std::make_pair(0, 0);
}
+namespace {
+
/// Helper class to expand pseudos.
class ExpandPseudo {
public:
@@ -54,6 +71,8 @@ public:
bool expand();
private:
+ typedef MachineBasicBlock::iterator Iter;
+
bool expandInstr(MachineBasicBlock &MBB, Iter I);
void expandLoadCCond(MachineBasicBlock &MBB, Iter I);
void expandStoreCCond(MachineBasicBlock &MBB, Iter I);
@@ -74,7 +93,8 @@ private:
const MipsSEInstrInfo &TII;
const MipsRegisterInfo &RegInfo;
};
-}
+
+} // end anonymous namespace
ExpandPseudo::ExpandPseudo(MachineFunction &MF_)
: MF(MF_), MRI(MF.getRegInfo()),
@@ -419,7 +439,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- if (CSI.size()) {
+ if (!CSI.empty()) {
// Find the instruction past the last instruction that saves a callee-saved
// register to the stack.
for (unsigned i = 0; i < CSI.size(); ++i)
@@ -471,7 +491,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
} else {
// Reg is either in GPR32 or FGR32.
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, 1), Offset));
+ nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
@@ -534,7 +554,6 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
void MipsSEFrameLowering::emitInterruptPrologueStub(
MachineFunction &MF, MachineBasicBlock &MBB) const {
-
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -722,7 +741,6 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
void MipsSEFrameLowering::emitInterruptEpilogueStub(
MachineFunction &MF, MachineBasicBlock &MBB) const {
-
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -820,7 +838,6 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool
MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
-
// Reserve call frame if the size of the maximum call frame fits into 16-bit
// immediate field and there are no variable sized objects on the stack.
// Make sure the second register scavenger spill slot can be accessed with one
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 63cd3cebc56a..bf30deb1905e 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -1,4 +1,4 @@
-//===-- MipsSEFrameLowering.h - Mips32/64 frame lowering --------*- C++ -*-===//
+//===- MipsSEFrameLowering.h - Mips32/64 frame lowering ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,6 +15,8 @@
#define LLVM_LIB_TARGET_MIPS_MIPSSEFRAMELOWERING_H
#include "MipsFrameLowering.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <vector>
namespace llvm {
@@ -47,6 +49,7 @@ private:
void emitInterruptPrologueStub(MachineFunction &MF,
MachineBasicBlock &MBB) const;
};
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_MIPS_MIPSSEFRAMELOWERING_H
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 92d3c001df94..c9cf9363b8c9 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -97,11 +97,13 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
// Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
if ((MI.getOpcode() == Mips::ADDiu) &&
(MI.getOperand(1).getReg() == Mips::ZERO) &&
+ (MI.getOperand(2).isImm()) &&
(MI.getOperand(2).getImm() == 0)) {
DstReg = MI.getOperand(0).getReg();
ZeroReg = Mips::ZERO;
} else if ((MI.getOpcode() == Mips::DADDiu) &&
(MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+ (MI.getOperand(2).isImm()) &&
(MI.getOperand(2).getImm() == 0)) {
DstReg = MI.getOperand(0).getReg();
ZeroReg = Mips::ZERO_64;
@@ -690,7 +692,7 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const {
// as the original value.
if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) {
- Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), SDLoc(N),
+ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N),
EltTy);
return true;
}
@@ -722,7 +724,7 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
// Extract the run of set bits starting with bit zero, and test that the
// result is the same as the original value
if (ImmValue == (ImmValue & ~(ImmValue + 1))) {
- Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), SDLoc(N),
+ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N),
EltTy);
return true;
}
@@ -932,6 +934,9 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
// same set/ of registers. Similarly, ldi.h isn't capable of producing {
// 0x00000000, 0x00000001, 0x00000000, 0x00000001 } but 'ldi.d wd, 1' can.
+ const MipsABIInfo &ABI =
+ static_cast<const MipsTargetMachine &>(TM).getABI();
+
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Node);
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
@@ -969,13 +974,233 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
break;
}
- if (!SplatValue.isSignedIntN(10))
- return false;
-
- SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL,
- ViaVecTy.getVectorElementType());
+ SDNode *Res;
- SDNode *Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm);
+ // If we have a signed 10 bit integer, we can splat it directly.
+ //
+ // If we have something bigger we can synthesize the value into a GPR and
+ // splat from there.
+ if (SplatValue.isSignedIntN(10)) {
+ SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL,
+ ViaVecTy.getVectorElementType());
+
+ Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm);
+ } else if (SplatValue.isSignedIntN(16) &&
+ ((ABI.IsO32() && SplatBitSize < 64) ||
+ (ABI.IsN32() || ABI.IsN64()))) {
+ // Only handle signed 16 bit values when the element size is GPR width.
+ // MIPS64 can handle all the cases but MIPS32 would need to handle
+ // negative cases specifically here. Instead, handle those cases as
+ // 64bit values.
+
+ bool Is32BitSplat = ABI.IsO32() || SplatBitSize < 64;
+ const unsigned ADDiuOp = Is32BitSplat ? Mips::ADDiu : Mips::DADDiu;
+ const MVT SplatMVT = Is32BitSplat ? MVT::i32 : MVT::i64;
+ SDValue ZeroVal = CurDAG->getRegister(
+ Is32BitSplat ? Mips::ZERO : Mips::ZERO_64, SplatMVT);
+
+ const unsigned FILLOp =
+ SplatBitSize == 16
+ ? Mips::FILL_H
+ : (SplatBitSize == 32 ? Mips::FILL_W
+ : (SplatBitSize == 64 ? Mips::FILL_D : 0));
+
+ assert(FILLOp != 0 && "Unknown FILL Op for splat synthesis!");
+ assert((!ABI.IsO32() || (FILLOp != Mips::FILL_D)) &&
+ "Attempting to use fill.d on MIPS32!");
+
+ const unsigned Lo = SplatValue.getLoBits(16).getZExtValue();
+ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, SplatMVT);
+
+ Res = CurDAG->getMachineNode(ADDiuOp, DL, SplatMVT, ZeroVal, LoVal);
+ Res = CurDAG->getMachineNode(FILLOp, DL, ViaVecTy, SDValue(Res, 0));
+
+ } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 32) {
+ // Only handle the cases where the splat size agrees with the size
+ // of the SplatValue here.
+ const unsigned Lo = SplatValue.getLoBits(16).getZExtValue();
+ const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue();
+ SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+
+ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32);
+ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32);
+
+ if (Hi)
+ Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal);
+
+ if (Lo)
+ Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32,
+ Hi ? SDValue(Res, 0) : ZeroVal, LoVal);
+
+ assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!");
+ Res = CurDAG->getMachineNode(Mips::FILL_W, DL, MVT::v4i32, SDValue(Res, 0));
+
+ } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 64 &&
+ (ABI.IsN32() || ABI.IsN64())) {
+ // N32 and N64 can perform some tricks that O32 can't for signed 32 bit
+ // integers due to having 64bit registers. lui will cause the necessary
+ // zero/sign extension.
+ const unsigned Lo = SplatValue.getLoBits(16).getZExtValue();
+ const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue();
+ SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+
+ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32);
+ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32);
+
+ if (Hi)
+ Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal);
+
+ if (Lo)
+ Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32,
+ Hi ? SDValue(Res, 0) : ZeroVal, LoVal);
+
+ Res = CurDAG->getMachineNode(
+ Mips::SUBREG_TO_REG, DL, MVT::i64,
+ CurDAG->getTargetConstant(((Hi >> 15) & 0x1), DL, MVT::i64),
+ SDValue(Res, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64));
+
+ Res =
+ CurDAG->getMachineNode(Mips::FILL_D, DL, MVT::v2i64, SDValue(Res, 0));
+
+ } else if (SplatValue.isSignedIntN(64)) {
+ // If we have a 64 bit Splat value, we perform a similar sequence to the
+ // above:
+ //
+ // MIPS32: MIPS64:
+ // lui $res, %highest(val) lui $res, %highest(val)
+ // ori $res, $res, %higher(val) ori $res, $res, %higher(val)
+ // lui $res2, %hi(val) lui $res2, %hi(val)
+ // ori $res2, %res2, %lo(val) ori $res2, %res2, %lo(val)
+ // $res3 = fill $res2 dinsu $res, $res2, 0, 32
+ // $res4 = insert.w $res3[1], $res fill.d $res
+ // splat.d $res4, 0
+ //
+ // The ability to use dinsu is guaranteed as MSA requires MIPSR5. This saves
+ // having to materialize the value by shifts and ors.
+ //
+ // FIXME: Implement the preferred sequence for MIPS64R6:
+ //
+ // MIPS64R6:
+ // ori $res, $zero, %lo(val)
+ // daui $res, $res, %hi(val)
+ // dahi $res, $res, %higher(val)
+ // dati $res, $res, %highest(cal)
+ // fill.d $res
+ //
+
+ const unsigned Lo = SplatValue.getLoBits(16).getZExtValue();
+ const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue();
+ const unsigned Higher = SplatValue.lshr(32).getLoBits(16).getZExtValue();
+ const unsigned Highest = SplatValue.lshr(48).getLoBits(16).getZExtValue();
+
+ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32);
+ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32);
+ SDValue HigherVal = CurDAG->getTargetConstant(Higher, DL, MVT::i32);
+ SDValue HighestVal = CurDAG->getTargetConstant(Highest, DL, MVT::i32);
+ SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+
+ // Independent of whether we're targeting MIPS64 or not, the basic
+ // operations are the same. Also, directly use the $zero register if
+ // the 16 bit chunk is zero.
+ //
+ // For optimization purposes we always synthesize the splat value as
+ // an i32 value, then if we're targetting MIPS64, use SUBREG_TO_REG
+ // just before combining the values with dinsu to produce an i64. This
+ // enables SelectionDAG to aggressively share components of splat values
+ // where possible.
+ //
+ // FIXME: This is the general constant synthesis problem. This code
+ // should be factored out into a class shared between all the
+ // classes that need it. Specifically, for a splat size of 64
+ // bits that's a negative number we can do better than LUi/ORi
+ // for the upper 32bits.
+
+ if (Hi)
+ Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal);
+
+ if (Lo)
+ Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32,
+ Hi ? SDValue(Res, 0) : ZeroVal, LoVal);
+
+ SDNode *HiRes;
+ if (Highest)
+ HiRes = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HighestVal);
+
+ if (Higher)
+ HiRes = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32,
+ Highest ? SDValue(HiRes, 0) : ZeroVal,
+ HigherVal);
+
+
+ if (ABI.IsO32()) {
+ Res = CurDAG->getMachineNode(Mips::FILL_W, DL, MVT::v4i32,
+ (Hi || Lo) ? SDValue(Res, 0) : ZeroVal);
+
+ Res = CurDAG->getMachineNode(
+ Mips::INSERT_W, DL, MVT::v4i32, SDValue(Res, 0),
+ (Highest || Higher) ? SDValue(HiRes, 0) : ZeroVal,
+ CurDAG->getTargetConstant(1, DL, MVT::i32));
+
+ const TargetLowering *TLI = getTargetLowering();
+ const TargetRegisterClass *RC =
+ TLI->getRegClassFor(ViaVecTy.getSimpleVT());
+
+ Res = CurDAG->getMachineNode(
+ Mips::COPY_TO_REGCLASS, DL, ViaVecTy, SDValue(Res, 0),
+ CurDAG->getTargetConstant(RC->getID(), DL, MVT::i32));
+
+ Res = CurDAG->getMachineNode(
+ Mips::SPLATI_D, DL, MVT::v2i64, SDValue(Res, 0),
+ CurDAG->getTargetConstant(0, DL, MVT::i32));
+ } else if (ABI.IsN64() || ABI.IsN32()) {
+
+ SDValue Zero64Val = CurDAG->getRegister(Mips::ZERO_64, MVT::i64);
+ const bool HiResNonZero = Highest || Higher;
+ const bool ResNonZero = Hi || Lo;
+
+ if (HiResNonZero)
+ HiRes = CurDAG->getMachineNode(
+ Mips::SUBREG_TO_REG, DL, MVT::i64,
+ CurDAG->getTargetConstant(((Highest >> 15) & 0x1), DL, MVT::i64),
+ SDValue(HiRes, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64));
+
+ if (ResNonZero)
+ Res = CurDAG->getMachineNode(
+ Mips::SUBREG_TO_REG, DL, MVT::i64,
+ CurDAG->getTargetConstant(((Hi >> 15) & 0x1), DL, MVT::i64),
+ SDValue(Res, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64));
+
+ // We have 3 cases:
+ // The HiRes is nonzero but Res is $zero => dsll32 HiRes, 0
+ // The Res is nonzero but HiRes is $zero => dinsu Res, $zero, 32, 32
+ // Both are non zero => dinsu Res, HiRes, 32, 32
+ //
+ // The obvious "missing" case is when both are zero, but that case is
+ // handled by the ldi case.
+ if (ResNonZero) {
+ SDValue Ops[4] = {HiResNonZero ? SDValue(HiRes, 0) : Zero64Val,
+ CurDAG->getTargetConstant(64, DL, MVT::i32),
+ CurDAG->getTargetConstant(32, DL, MVT::i32),
+ SDValue(Res, 0)};
+
+ Res = CurDAG->getMachineNode(Mips::DINSU, DL, MVT::i64, Ops);
+ } else if (HiResNonZero) {
+ Res = CurDAG->getMachineNode(
+ Mips::DSLL32, DL, MVT::i64, SDValue(HiRes, 0),
+ CurDAG->getTargetConstant(0, DL, MVT::i32));
+ } else
+ llvm_unreachable(
+ "Zero splat value handled by non-zero 64bit splat synthesis!");
+
+ Res = CurDAG->getMachineNode(Mips::FILL_D, DL, MVT::v2i64, SDValue(Res, 0));
+ } else
+ llvm_unreachable("Unknown ABI in MipsISelDAGToDAG!");
+
+ } else
+ return false;
if (ResVecTy != ViaVecTy) {
// If LdiOp is writing to a different register class to ResVecTy, then
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index f28e8b36fdbc..e2da8477295b 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1123,7 +1123,8 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
case ISD::MUL:
return performMULCombine(N, DAG, DCI, this);
case ISD::SHL:
- return performSHLCombine(N, DAG, DCI, Subtarget);
+ Val = performSHLCombine(N, DAG, DCI, Subtarget);
+ break;
case ISD::SRA:
return performSRACombine(N, DAG, DCI, Subtarget);
case ISD::SRL:
@@ -1643,7 +1644,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
report_fatal_error("Immediate out of range");
APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(),
- Op->getConstantOperandVal(3));
+ Op->getConstantOperandVal(3) + 1);
return DAG.getNode(ISD::VSELECT, DL, VecTy,
DAG.getConstant(Mask, DL, VecTy, true),
Op->getOperand(2), Op->getOperand(1));
@@ -1658,7 +1659,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
report_fatal_error("Immediate out of range");
APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(),
- Op->getConstantOperandVal(3));
+ Op->getConstantOperandVal(3) + 1);
return DAG.getNode(ISD::VSELECT, DL, VecTy,
DAG.getConstant(Mask, DL, VecTy, true),
Op->getOperand(2), Op->getOperand(1));
@@ -2529,11 +2530,10 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
SplatBitSize != 64)
return SDValue();
- // If the value fits into a simm10 then we can use ldi.[bhwd]
- // However, if it isn't an integer type we will have to bitcast from an
- // integer type first. Also, if there are any undefs, we must lower them
- // to defined values first.
- if (ResTy.isInteger() && !HasAnyUndefs && SplatValue.isSignedIntN(10))
+ // If the value isn't an integer type we will have to bitcast
+ // from an integer type first. Also, if there are any undefs, we must
+ // lower them to defined values first.
+ if (ResTy.isInteger() && !HasAnyUndefs)
return Op;
EVT ViaVecTy;
@@ -3628,7 +3628,7 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
MachineInstrBuilder MIB =
BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
for (unsigned i = 1; i < MI.getNumOperands(); i++)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index ea703d0edd96..91e712a7a54e 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -540,11 +540,20 @@ unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
+
+ MachineInstrBuilder MIB;
if (Subtarget.isGP64bit())
- BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn64))
- .addReg(Mips::RA_64);
+ MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn64))
+ .addReg(Mips::RA_64, RegState::Undef);
else
- BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)).addReg(Mips::RA);
+ MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn))
+ .addReg(Mips::RA, RegState::Undef);
+
+ // Retain any imp-use flags.
+ for (auto & MO : I->operands()) {
+ if (MO.isImplicit())
+ MIB.add(MO);
+ }
}
void MipsSEInstrInfo::expandERet(MachineBasicBlock &MBB,
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 3e7570ff46ed..8f5ecadecdea 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -70,8 +70,8 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU,
HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
- Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasEVA(false), TM(TM),
- TargetTriple(TT), TSInfo(),
+ Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
+ HasEVA(false), TM(TM), TargetTriple(TT), TSInfo(),
InstrInfo(
MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
FrameLowering(MipsFrameLowering::create(*this)),
@@ -117,6 +117,9 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU,
if (NoABICalls && TM.isPositionIndependent())
report_fatal_error("position-independent code requires '-mabicalls'");
+ if (isABI_N64() && !TM.isPositionIndependent() && !hasSym32())
+ NoABICalls = true;
+
// Set UseSmallSection.
UseSmallSection = GPOpt;
if (!NoABICalls && GPOpt) {
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 38d3cee70477..cca2cb8a4660 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -142,6 +142,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// UseTCCInDIV -- Enables the use of trapping in the assembler.
bool UseTCCInDIV;
+ // Sym32 -- On Mips64 symbols are 32 bits.
+ bool HasSym32;
+
// HasEVA -- supports EVA ASE.
bool HasEVA;
@@ -229,7 +232,11 @@ public:
unsigned getGPRSizeInBytes() const { return isGP64bit() ? 8 : 4; }
bool isPTR64bit() const { return IsPTR64bit; }
bool isPTR32bit() const { return !IsPTR64bit; }
+ bool hasSym32() const {
+ return (HasSym32 && isABI_N64()) || isABI_N32() || isABI_O32();
+ }
bool isSingleFloat() const { return IsSingleFloat; }
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool hasVFPU() const { return HasVFPU; }
bool inMips16Mode() const { return InMips16Mode; }
bool inMips16ModeDefault() const {
@@ -271,6 +278,8 @@ public:
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
+ bool isXRaySupported() const override { return true; }
+
// for now constant islands are on for the whole compilation unit but we only
// really use them if in addition we are in mips16 mode
static bool useConstantIslands();
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index bb48188e3b87..a45a9c4b41c3 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -11,27 +11,30 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsTargetMachine.h"
+#include "MCTargetDesc/MipsABIInfo.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "Mips.h"
-#include "Mips16FrameLowering.h"
#include "Mips16ISelDAGToDAG.h"
-#include "Mips16ISelLowering.h"
-#include "Mips16InstrInfo.h"
-#include "MipsFrameLowering.h"
-#include "MipsInstrInfo.h"
-#include "MipsSEFrameLowering.h"
#include "MipsSEISelDAGToDAG.h"
-#include "MipsSEISelLowering.h"
-#include "MipsSEInstrInfo.h"
+#include "MipsSubtarget.h"
#include "MipsTargetObjectFile.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Target/TargetOptions.h"
+#include <string>
using namespace llvm;
@@ -48,7 +51,7 @@ extern "C" void LLVMInitializeMipsTarget() {
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
const TargetOptions &Options,
bool isLittle) {
- std::string Ret = "";
+ std::string Ret;
MipsABIInfo ABI = MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions);
// There are both little and big endian mips.
@@ -102,7 +105,7 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
CPU, FS, Options, getEffectiveRelocModel(CM, RM), CM,
OL),
- isLittle(isLittle), TLOF(make_unique<MipsTargetObjectFile>()),
+ isLittle(isLittle), TLOF(llvm::make_unique<MipsTargetObjectFile>()),
ABI(MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)),
Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this),
NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16",
@@ -113,9 +116,9 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT,
initAsmInfo();
}
-MipsTargetMachine::~MipsTargetMachine() {}
+MipsTargetMachine::~MipsTargetMachine() = default;
-void MipsebTargetMachine::anchor() { }
+void MipsebTargetMachine::anchor() {}
MipsebTargetMachine::MipsebTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -125,7 +128,7 @@ MipsebTargetMachine::MipsebTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
-void MipselTargetMachine::anchor() { }
+void MipselTargetMachine::anchor() {}
MipselTargetMachine::MipselTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -182,10 +185,10 @@ void MipsTargetMachine::resetSubtarget(MachineFunction *MF) {
Subtarget = const_cast<MipsSubtarget *>(getSubtargetImpl(*MF->getFunction()));
MF->setSubtarget(Subtarget);
- return;
}
namespace {
+
/// Mips Code Generator Pass Configuration Options.
class MipsPassConfig : public TargetPassConfig {
public:
@@ -209,11 +212,10 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
void addPreEmitPass() override;
-
void addPreRegAlloc() override;
-
};
-} // namespace
+
+} // end anonymous namespace
TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
return new MipsPassConfig(this, PM);
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index e4cf17e2abd8..140d7133f879 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- MipsTargetMachine.h - Define TargetMachine for Mips -----*- C++ -*-===//
+//===- MipsTargetMachine.h - Define TargetMachine for Mips ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,15 +16,14 @@
#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsSubtarget.h"
-#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
+#include <memory>
namespace llvm {
-class formatted_raw_ostream;
-class MipsRegisterInfo;
class MipsTargetMachine : public LLVMTargetMachine {
bool isLittle;
@@ -73,6 +72,7 @@ public:
///
class MipsebTargetMachine : public MipsTargetMachine {
virtual void anchor();
+
public:
MipsebTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -84,6 +84,7 @@ public:
///
class MipselTargetMachine : public MipsTargetMachine {
virtual void anchor();
+
public:
MipselTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -91,6 +92,6 @@ public:
CodeGenOpt::Level OL);
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_MIPS_MIPSTARGETMACHINE_H
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 399ff1fd96e0..a8eecfcc138c 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -17,7 +17,6 @@ set(NVPTXCodeGen_sources
NVPTXISelDAGToDAG.cpp
NVPTXISelLowering.cpp
NVPTXImageOptimizer.cpp
- NVPTXInferAddressSpaces.cpp
NVPTXInstrInfo.cpp
NVPTXLowerAggrCopies.cpp
NVPTXLowerArgs.cpp
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index 4594c22b8701..b774fe169d71 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -61,6 +61,12 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
case 6:
OS << "%fd";
break;
+ case 7:
+ OS << "%h";
+ break;
+ case 8:
+ OS << "%hh";
+ break;
}
unsigned VReg = RegNo & 0x0FFFFFFF;
@@ -247,8 +253,12 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
O << "s";
else if (Imm == NVPTX::PTXLdStInstCode::Unsigned)
O << "u";
- else
+ else if (Imm == NVPTX::PTXLdStInstCode::Untyped)
+ O << "b";
+ else if (Imm == NVPTX::PTXLdStInstCode::Float)
O << "f";
+ else
+ llvm_unreachable("Unknown register type");
} else if (!strcmp(Modifier, "vec")) {
if (Imm == NVPTX::PTXLdStInstCode::V2)
O << ".v2";
diff --git a/lib/Target/NVPTX/LLVMBuild.txt b/lib/Target/NVPTX/LLVMBuild.txt
index 70a2de3441ce..ee8aaa998bb6 100644
--- a/lib/Target/NVPTX/LLVMBuild.txt
+++ b/lib/Target/NVPTX/LLVMBuild.txt
@@ -28,5 +28,5 @@ has_asmprinter = 1
type = Library
name = NVPTXCodeGen
parent = NVPTX
-required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target TransformUtils Vectorize
+required_libraries = Analysis AsmPrinter CodeGen Core IPO MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target TransformUtils Vectorize
add_to_library_groups = NVPTX
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index c455a437d8d5..902d1b25e7dd 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -45,10 +45,8 @@ FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMPass();
-FunctionPass *createNVPTXInferAddressSpacesPass();
FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
FunctionPass *createNVVMReflectPass();
-FunctionPass *createNVVMReflectPass(const StringMap<int> &Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
@@ -108,7 +106,8 @@ enum AddressSpace {
enum FromType {
Unsigned = 0,
Signed,
- Float
+ Float,
+ Untyped
};
enum VecType {
Scalar = 1,
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 3c2594c77f45..21e25de80dc7 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -320,6 +320,10 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
switch (Cnt->getType()->getTypeID()) {
default: report_fatal_error("Unsupported FP type"); break;
+ case Type::HalfTyID:
+ MCOp = MCOperand::createExpr(
+ NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext));
+ break;
case Type::FloatTyID:
MCOp = MCOperand::createExpr(
NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
@@ -357,6 +361,10 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
Ret = (5 << 28);
} else if (RC == &NVPTX::Float64RegsRegClass) {
Ret = (6 << 28);
+ } else if (RC == &NVPTX::Float16RegsRegClass) {
+ Ret = (7 << 28);
+ } else if (RC == &NVPTX::Float16x2RegsRegClass) {
+ Ret = (8 << 28);
} else {
report_fatal_error("Bad register class");
}
@@ -396,12 +404,15 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
unsigned size = 0;
if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
size = ITy->getBitWidth();
- if (size < 32)
- size = 32;
} else {
assert(Ty->isFloatingPointTy() && "Floating point type expected here");
size = Ty->getPrimitiveSizeInBits();
}
+ // PTX ABI requires all scalar return values to be at least 32
+ // bits in size. fp16 normally uses .b16 as its storage type in
+ // PTX, so its size must be adjusted here, too.
+ if (size < 32)
+ size = 32;
O << ".param .b" << size << " func_retval0";
} else if (isa<PointerType>(Ty)) {
@@ -1221,7 +1232,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
else
O << " .align " << GVar->getAlignment();
- if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
+ if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
+ (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
O << " .";
// Special case: ABI requires that we use .u8 for predicates
if (ETy->isIntegerTy(1))
@@ -1262,6 +1274,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
// targets that support these high level field accesses. Structs, arrays
// and vectors are lowered into arrays of bytes.
switch (ETy->getTypeID()) {
+ case Type::IntegerTyID: // Integers larger than 64 bits
case Type::StructTyID:
case Type::ArrayTyID:
case Type::VectorTyID:
@@ -1376,6 +1389,9 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
}
break;
}
+ case Type::HalfTyID:
+ // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
+ return "b16";
case Type::FloatTyID:
return "f32";
case Type::DoubleTyID:
@@ -1477,7 +1493,7 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
const DataLayout &DL = getDataLayout();
- const AttributeSet &PAL = F->getAttributes();
+ const AttributeList &PAL = F->getAttributes();
const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
@@ -1534,7 +1550,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
}
- if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) {
+ if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) {
if (Ty->isAggregateType() || Ty->isVectorTy()) {
// Just print .param .align <a> .b8 .param[size];
// <a> = PAL.getparamalignment
@@ -1601,6 +1617,11 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
sz = 32;
} else if (isa<PointerType>(Ty))
sz = thePointerTy.getSizeInBits();
+ else if (Ty->isHalfTy())
+ // PTX ABI requires all scalar parameters to be at least 32
+ // bits in size. fp16 normally uses .b16 as its storage type
+ // in PTX, so its size must be adjusted here, too.
+ sz = 32;
else
sz = Ty->getPrimitiveSizeInBits();
if (isABI)
@@ -1977,6 +1998,17 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
const DataLayout &DL = getDataLayout();
int Bytes;
+ // Integers of arbitrary width
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+ APInt Val = CI->getValue();
+ for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
+ uint8_t Byte = Val.getLoBits(8).getZExtValue();
+ aggBuffer->addBytes(&Byte, 1, 1);
+ Val = Val.lshr(8);
+ }
+ return;
+ }
+
// Old constants
if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
if (CPV->getNumOperands())
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 43c478f4212f..274977254046 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -26,23 +26,6 @@ using namespace llvm;
#define DEBUG_TYPE "nvptx-isel"
-static cl::opt<int> UsePrecDivF32(
- "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
- cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
- " IEEE Compliant F32 div.rnd if available."),
- cl::init(2));
-
-static cl::opt<bool>
-UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
- cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
- cl::init(true));
-
-static cl::opt<bool>
-FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
- cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
- cl::init(false));
-
-
/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
@@ -57,45 +40,20 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
}
bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
- return SelectionDAGISel::runOnMachineFunction(MF);
+ Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
+ return SelectionDAGISel::runOnMachineFunction(MF);
}
int NVPTXDAGToDAGISel::getDivF32Level() const {
- if (UsePrecDivF32.getNumOccurrences() > 0) {
- // If nvptx-prec-div32=N is used on the command-line, always honor it
- return UsePrecDivF32;
- } else {
- // Otherwise, use div.approx if fast math is enabled
- if (TM.Options.UnsafeFPMath)
- return 0;
- else
- return 2;
- }
+ return Subtarget->getTargetLowering()->getDivF32Level();
}
bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
- if (UsePrecSqrtF32.getNumOccurrences() > 0) {
- // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
- return UsePrecSqrtF32;
- } else {
- // Otherwise, use sqrt.approx if fast math is enabled
- return !TM.Options.UnsafeFPMath;
- }
+ return Subtarget->getTargetLowering()->usePrecSqrtF32();
}
bool NVPTXDAGToDAGISel::useF32FTZ() const {
- if (FtzEnabled.getNumOccurrences() > 0) {
- // If nvptx-f32ftz is used on the command-line, always honor it
- return FtzEnabled;
- } else {
- const Function *F = MF->getFunction();
- // Otherwise, check for an nvptx-f32ftz attribute on the function
- if (F->hasFnAttribute("nvptx-f32ftz"))
- return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
- else
- return false;
- }
+ return Subtarget->getTargetLowering()->useF32FTZ(*MF);
}
bool NVPTXDAGToDAGISel::allowFMA() const {
@@ -103,6 +61,11 @@ bool NVPTXDAGToDAGISel::allowFMA() const {
return TL->allowFMA(*MF, OptLevel);
}
+bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
+ const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
+ return TL->allowUnsafeFPMath(*MF);
+}
+
/// Select - Select instructions not customized! Used for
/// expanded, promoted and normal instructions.
void NVPTXDAGToDAGISel::Select(SDNode *N) {
@@ -121,6 +84,14 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
if (tryStore(N))
return;
break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (tryEXTRACT_VECTOR_ELEMENT(N))
+ return;
+ break;
+ case NVPTXISD::SETP_F16X2:
+ SelectSETP_F16X2(N);
+ return;
+
case NVPTXISD::LoadV2:
case NVPTXISD::LoadV4:
if (tryLoadVector(N))
@@ -515,6 +486,10 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
case ISD::ADDRSPACECAST:
SelectAddrSpaceCast(N);
return;
+ case ISD::ConstantFP:
+ if (tryConstantFP16(N))
+ return;
+ break;
default:
break;
}
@@ -536,6 +511,140 @@ bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
}
}
+// There's no way to specify FP16 immediates in .f16 ops, so we have to
+// load them into an .f16 register first.
+bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
+ if (N->getValueType(0) != MVT::f16)
+ return false;
+ SDValue Val = CurDAG->getTargetConstantFP(
+ cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
+ SDNode *LoadConstF16 =
+ CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
+ ReplaceNode(N, LoadConstF16);
+ return true;
+}
+
+// Map ISD:CONDCODE value to appropriate CmpMode expected by
+// NVPTXInstPrinter::printCmpMode()
+static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
+ using NVPTX::PTXCmpMode::CmpMode;
+ unsigned PTXCmpMode = [](ISD::CondCode CC) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unexpected condition code.");
+ case ISD::SETOEQ:
+ return CmpMode::EQ;
+ case ISD::SETOGT:
+ return CmpMode::GT;
+ case ISD::SETOGE:
+ return CmpMode::GE;
+ case ISD::SETOLT:
+ return CmpMode::LT;
+ case ISD::SETOLE:
+ return CmpMode::LE;
+ case ISD::SETONE:
+ return CmpMode::NE;
+ case ISD::SETO:
+ return CmpMode::NUM;
+ case ISD::SETUO:
+ return CmpMode::NotANumber;
+ case ISD::SETUEQ:
+ return CmpMode::EQU;
+ case ISD::SETUGT:
+ return CmpMode::GTU;
+ case ISD::SETUGE:
+ return CmpMode::GEU;
+ case ISD::SETULT:
+ return CmpMode::LTU;
+ case ISD::SETULE:
+ return CmpMode::LEU;
+ case ISD::SETUNE:
+ return CmpMode::NEU;
+ case ISD::SETEQ:
+ return CmpMode::EQ;
+ case ISD::SETGT:
+ return CmpMode::GT;
+ case ISD::SETGE:
+ return CmpMode::GE;
+ case ISD::SETLT:
+ return CmpMode::LT;
+ case ISD::SETLE:
+ return CmpMode::LE;
+ case ISD::SETNE:
+ return CmpMode::NE;
+ }
+ }(CondCode.get());
+
+ if (FTZ)
+ PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
+
+ return PTXCmpMode;
+}
+
+bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
+ unsigned PTXCmpMode =
+ getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
+ SDLoc DL(N);
+ SDNode *SetP = CurDAG->getMachineNode(
+ NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
+ N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
+ ReplaceNode(N, SetP);
+ return true;
+}
+
+// Find all instances of extract_vector_elt that use this v2f16 vector
+// and coalesce them into a scattering move instruction.
+bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
+ SDValue Vector = N->getOperand(0);
+
+ // We only care about f16x2 as it's the only real vector type we
+ // need to deal with.
+ if (Vector.getSimpleValueType() != MVT::v2f16)
+ return false;
+
+ // Find and record all uses of this vector that extract element 0 or 1.
+ SmallVector<SDNode *, 4> E0, E1;
+ for (const auto &U : Vector.getNode()->uses()) {
+ if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ continue;
+ if (U->getOperand(0) != Vector)
+ continue;
+ if (const ConstantSDNode *IdxConst =
+ dyn_cast<ConstantSDNode>(U->getOperand(1))) {
+ if (IdxConst->getZExtValue() == 0)
+ E0.push_back(U);
+ else if (IdxConst->getZExtValue() == 1)
+ E1.push_back(U);
+ else
+ llvm_unreachable("Invalid vector index.");
+ }
+ }
+
+ // There's no point scattering f16x2 if we only ever access one
+ // element of it.
+ if (E0.empty() || E1.empty())
+ return false;
+
+ unsigned Op = NVPTX::SplitF16x2;
+ // If the vector has been BITCAST'ed from i32, we can use original
+ // value directly and avoid register-to-register move.
+ SDValue Source = Vector;
+ if (Vector->getOpcode() == ISD::BITCAST) {
+ Op = NVPTX::SplitI32toF16x2;
+ Source = Vector->getOperand(0);
+ }
+ // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
+ // into f16,f16 SplitF16x2(V)
+ SDNode *ScatterOp =
+ CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
+ for (auto *Node : E0)
+ ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
+ for (auto *Node : E1)
+ ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
+
+ return true;
+}
+
static unsigned int getCodeAddrSpace(MemSDNode *N) {
const Value *Src = N->getMemOperand()->getValue();
@@ -681,6 +790,35 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
}
}
+// Helper function template to reduce amount of boilerplate code for
+// opcode selection.
+static Optional<unsigned> pickOpcodeForVT(
+ MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
+ unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
+ unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
+ switch (VT) {
+ case MVT::i1:
+ case MVT::i8:
+ return Opcode_i8;
+ case MVT::i16:
+ return Opcode_i16;
+ case MVT::i32:
+ return Opcode_i32;
+ case MVT::i64:
+ return Opcode_i64;
+ case MVT::f16:
+ return Opcode_f16;
+ case MVT::v2f16:
+ return Opcode_f16x2;
+ case MVT::f32:
+ return Opcode_f32;
+ case MVT::f64:
+ return Opcode_f64;
+ default:
+ return None;
+ }
+}
+
bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
SDLoc dl(N);
LoadSDNode *LD = cast<LoadSDNode>(N);
@@ -709,33 +847,32 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
isVolatile = false;
- // Vector Setting
- MVT SimpleVT = LoadedVT.getSimpleVT();
- unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
- if (SimpleVT.isVector()) {
- unsigned num = SimpleVT.getVectorNumElements();
- if (num == 2)
- vecType = NVPTX::PTXLdStInstCode::V2;
- else if (num == 4)
- vecType = NVPTX::PTXLdStInstCode::V4;
- else
- return false;
- }
-
// Type Setting: fromType + fromTypeWidth
//
// Sign : ISD::SEXTLOAD
// Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
// type is integer
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
+ MVT SimpleVT = LoadedVT.getSimpleVT();
MVT ScalarVT = SimpleVT.getScalarType();
// Read at least 8 bits (predicates are stored as 8-bit values)
unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
unsigned int fromType;
+
+ // Vector Setting
+ unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
+ if (SimpleVT.isVector()) {
+ assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
+ // v2f16 is loaded using ld.b32
+ fromTypeWidth = 32;
+ }
+
if ((LD->getExtensionType() == ISD::SEXTLOAD))
fromType = NVPTX::PTXLdStInstCode::Signed;
else if (ScalarVT.isFloatingPoint())
- fromType = NVPTX::PTXLdStInstCode::Float;
+ // f16 uses .b16 as its storage type.
+ fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
+ : NVPTX::PTXLdStInstCode::Float;
else
fromType = NVPTX::PTXLdStInstCode::Unsigned;
@@ -744,169 +881,72 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
SDValue N1 = N->getOperand(1);
SDValue Addr;
SDValue Offset, Base;
- unsigned Opcode;
+ Optional<unsigned> Opcode;
MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
if (SelectDirectAddr(N1, Addr)) {
- switch (TargetVT) {
- case MVT::i8:
- Opcode = NVPTX::LD_i8_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::LD_i16_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::LD_i32_avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::LD_i64_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::LD_f32_avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::LD_f64_avar;
- break;
- default:
+ Opcode = pickOpcodeForVT(
+ TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
+ NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
+ NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
+ if (!Opcode)
return false;
- }
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), Addr, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
+ NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
+ MVT::Other, Ops);
} else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
- switch (TargetVT) {
- case MVT::i8:
- Opcode = NVPTX::LD_i8_asi;
- break;
- case MVT::i16:
- Opcode = NVPTX::LD_i16_asi;
- break;
- case MVT::i32:
- Opcode = NVPTX::LD_i32_asi;
- break;
- case MVT::i64:
- Opcode = NVPTX::LD_i64_asi;
- break;
- case MVT::f32:
- Opcode = NVPTX::LD_f32_asi;
- break;
- case MVT::f64:
- Opcode = NVPTX::LD_f64_asi;
- break;
- default:
+ Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
+ NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
+ NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
+ NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
+ if (!Opcode)
return false;
- }
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
+ NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
+ MVT::Other, Ops);
} else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
- if (TM.is64Bit()) {
- switch (TargetVT) {
- case MVT::i8:
- Opcode = NVPTX::LD_i8_ari_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::LD_i16_ari_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::LD_i32_ari_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::LD_i64_ari_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::LD_f32_ari_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::LD_f64_ari_64;
- break;
- default:
- return false;
- }
- } else {
- switch (TargetVT) {
- case MVT::i8:
- Opcode = NVPTX::LD_i8_ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::LD_i16_ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::LD_i32_ari;
- break;
- case MVT::i64:
- Opcode = NVPTX::LD_i64_ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::LD_f32_ari;
- break;
- case MVT::f64:
- Opcode = NVPTX::LD_f64_ari;
- break;
- default:
- return false;
- }
- }
+ if (TM.is64Bit())
+ Opcode = pickOpcodeForVT(
+ TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
+ NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
+ NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
+ else
+ Opcode = pickOpcodeForVT(
+ TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
+ NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
+ NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
+ if (!Opcode)
+ return false;
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
+ NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
+ MVT::Other, Ops);
} else {
- if (TM.is64Bit()) {
- switch (TargetVT) {
- case MVT::i8:
- Opcode = NVPTX::LD_i8_areg_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::LD_i16_areg_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::LD_i32_areg_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::LD_i64_areg_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::LD_f32_areg_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::LD_f64_areg_64;
- break;
- default:
- return false;
- }
- } else {
- switch (TargetVT) {
- case MVT::i8:
- Opcode = NVPTX::LD_i8_areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::LD_i16_areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::LD_i32_areg;
- break;
- case MVT::i64:
- Opcode = NVPTX::LD_i64_areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::LD_f32_areg;
- break;
- case MVT::f64:
- Opcode = NVPTX::LD_f64_areg;
- break;
- default:
- return false;
- }
- }
+ if (TM.is64Bit())
+ Opcode = pickOpcodeForVT(
+ TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
+ NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
+ NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
+ NVPTX::LD_f64_areg_64);
+ else
+ Opcode = pickOpcodeForVT(
+ TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
+ NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
+ NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
+ if (!Opcode)
+ return false;
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), N1, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
+ NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
+ MVT::Other, Ops);
}
if (!NVPTXLD)
@@ -925,7 +965,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Addr, Offset, Base;
- unsigned Opcode;
+ Optional<unsigned> Opcode;
SDLoc DL(N);
SDNode *LD;
MemSDNode *MemSD = cast<MemSDNode>(N);
@@ -968,7 +1008,8 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
if (ExtensionType == ISD::SEXTLOAD)
FromType = NVPTX::PTXLdStInstCode::Signed;
else if (ScalarVT.isFloatingPoint())
- FromType = NVPTX::PTXLdStInstCode::Float;
+ FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
+ : NVPTX::PTXLdStInstCode::Float;
else
FromType = NVPTX::PTXLdStInstCode::Unsigned;
@@ -987,111 +1028,67 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
EVT EltVT = N->getValueType(0);
+ // v8f16 is a special case. PTX doesn't have ld.v8.f16
+ // instruction. Instead, we split the vector into v2f16 chunks and
+ // load them with ld.v4.b32.
+ if (EltVT == MVT::v2f16) {
+ assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
+ EltVT = MVT::i32;
+ FromType = NVPTX::PTXLdStInstCode::Untyped;
+ FromTypeWidth = 32;
+ }
+
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default:
return false;
case NVPTXISD::LoadV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v2_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v2_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v2_avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::LDV_i64_v2_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v2_avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::LDV_f64_v2_avar;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
+ NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
+ NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
+ NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
break;
case NVPTXISD::LoadV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v4_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v4_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v4_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v4_avar;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
+ NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
+ NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
+ NVPTX::LDV_f32_v4_avar, None);
break;
}
-
+ if (!Opcode)
+ return false;
SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Addr, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
} else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
switch (N->getOpcode()) {
default:
return false;
case NVPTXISD::LoadV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v2_asi;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v2_asi;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v2_asi;
- break;
- case MVT::i64:
- Opcode = NVPTX::LDV_i64_v2_asi;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v2_asi;
- break;
- case MVT::f64:
- Opcode = NVPTX::LDV_f64_v2_asi;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
+ NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
+ NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
+ NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
break;
case NVPTXISD::LoadV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v4_asi;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v4_asi;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v4_asi;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v4_asi;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
+ NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
+ NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
+ NVPTX::LDV_f32_v4_asi, None);
break;
}
-
+ if (!Opcode)
+ return false;
SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
} else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
if (TM.is64Bit()) {
@@ -1099,46 +1096,19 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
default:
return false;
case NVPTXISD::LoadV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v2_ari_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v2_ari_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v2_ari_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::LDV_i64_v2_ari_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v2_ari_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::LDV_f64_v2_ari_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
+ NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
+ NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
+ NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
+ NVPTX::LDV_f64_v2_ari_64);
break;
case NVPTXISD::LoadV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v4_ari_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v4_ari_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v4_ari_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v4_ari_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
+ NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
+ NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
+ NVPTX::LDV_f32_v4_ari_64, None);
break;
}
} else {
@@ -1146,101 +1116,47 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
default:
return false;
case NVPTXISD::LoadV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v2_ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v2_ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v2_ari;
- break;
- case MVT::i64:
- Opcode = NVPTX::LDV_i64_v2_ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v2_ari;
- break;
- case MVT::f64:
- Opcode = NVPTX::LDV_f64_v2_ari;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
+ NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
+ NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
+ NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
break;
case NVPTXISD::LoadV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v4_ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v4_ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v4_ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v4_ari;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
+ NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
+ NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
+ NVPTX::LDV_f32_v4_ari, None);
break;
}
}
-
+ if (!Opcode)
+ return false;
SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
} else {
if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return false;
case NVPTXISD::LoadV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v2_areg_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v2_areg_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v2_areg_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::LDV_i64_v2_areg_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v2_areg_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::LDV_f64_v2_areg_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
+ NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
+ NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
+ NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
+ NVPTX::LDV_f64_v2_areg_64);
break;
case NVPTXISD::LoadV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v4_areg_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v4_areg_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v4_areg_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v4_areg_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
+ NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
+ NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
+ NVPTX::LDV_f32_v4_areg_64, None);
break;
}
} else {
@@ -1248,54 +1164,28 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
default:
return false;
case NVPTXISD::LoadV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v2_areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v2_areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v2_areg;
- break;
- case MVT::i64:
- Opcode = NVPTX::LDV_i64_v2_areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v2_areg;
- break;
- case MVT::f64:
- Opcode = NVPTX::LDV_f64_v2_areg;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
+ NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
+ NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
+ NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
+ NVPTX::LDV_f64_v2_areg);
break;
case NVPTXISD::LoadV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v4_areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v4_areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v4_areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v4_areg;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
+ NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
+ NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
+ NVPTX::LDV_f32_v4_areg, None);
break;
}
}
-
+ if (!Opcode)
+ return false;
SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Op1, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
}
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
@@ -1338,7 +1228,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
Mem = cast<MemSDNode>(N);
}
- unsigned Opcode;
+ Optional<unsigned> Opcode;
SDLoc DL(N);
SDNode *LD;
SDValue Base, Offset, Addr;
@@ -1366,142 +1256,72 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
default:
return false;
case ISD::INTRINSIC_W_CHAIN:
- if (IsLDG) {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
- break;
- }
- } else {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
- break;
- }
- }
+ if (IsLDG)
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
+ else
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
break;
case NVPTXISD::LDGV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
break;
case NVPTXISD::LDUV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
break;
case NVPTXISD::LDGV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
+ NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
break;
case NVPTXISD::LDUV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
+ NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
break;
}
-
+ if (!Opcode)
+ return false;
SDValue Ops[] = { Addr, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
} else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
if (TM.is64Bit()) {
@@ -1510,139 +1330,68 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
return false;
case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
- if (IsLDG) {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
- break;
- }
- } else {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
- break;
- }
- }
+ if (IsLDG)
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
+ else
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
break;
case NVPTXISD::LDUV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
break;
case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
+ NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
break;
case NVPTXISD::LDUV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
+ NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
break;
}
} else {
@@ -1651,146 +1400,75 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
return false;
case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
- if (IsLDG) {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
- break;
- }
- } else {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
- break;
- }
- }
+ if (IsLDG)
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
+ else
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
break;
case NVPTXISD::LDUV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
break;
case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
+ NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
break;
case NVPTXISD::LDUV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
+ NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
break;
}
}
-
- SDValue Ops[] = { Base, Offset, Chain };
-
- LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
+ if (!Opcode)
+ return false;
+ SDValue Ops[] = {Base, Offset, Chain};
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
} else {
if (TM.is64Bit()) {
switch (N->getOpcode()) {
@@ -1798,139 +1476,68 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
return false;
case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
- if (IsLDG) {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
- break;
- }
- } else {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
- break;
- }
- }
+ if (IsLDG)
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
+ else
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
break;
case NVPTXISD::LDUV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
break;
case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
+ NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
break;
case NVPTXISD::LDUV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
+ NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
break;
}
} else {
@@ -1939,145 +1546,75 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
return false;
case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
- if (IsLDG) {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
- break;
- }
- } else {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
- break;
- }
- }
+ if (IsLDG)
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
+ else
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
break;
case NVPTXISD::LDUV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
break;
case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
+ NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
break;
case NVPTXISD::LDUV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
+ NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
break;
}
}
-
+ if (!Opcode)
+ return false;
SDValue Ops[] = { Op1, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
}
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
@@ -2151,24 +1688,23 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
// Vector Setting
MVT SimpleVT = StoreVT.getSimpleVT();
unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
- if (SimpleVT.isVector()) {
- unsigned num = SimpleVT.getVectorNumElements();
- if (num == 2)
- vecType = NVPTX::PTXLdStInstCode::V2;
- else if (num == 4)
- vecType = NVPTX::PTXLdStInstCode::V4;
- else
- return false;
- }
// Type Setting: toType + toTypeWidth
// - for integer type, always use 'u'
//
MVT ScalarVT = SimpleVT.getScalarType();
unsigned toTypeWidth = ScalarVT.getSizeInBits();
+ if (SimpleVT.isVector()) {
+ assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
+ // v2f16 is stored using st.b32
+ toTypeWidth = 32;
+ }
+
unsigned int toType;
if (ScalarVT.isFloatingPoint())
- toType = NVPTX::PTXLdStInstCode::Float;
+ // f16 uses .b16 as its storage type.
+ toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
+ : NVPTX::PTXLdStInstCode::Float;
else
toType = NVPTX::PTXLdStInstCode::Unsigned;
@@ -2178,173 +1714,73 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
SDValue N2 = N->getOperand(2);
SDValue Addr;
SDValue Offset, Base;
- unsigned Opcode;
+ Optional<unsigned> Opcode;
MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
if (SelectDirectAddr(N2, Addr)) {
- switch (SourceVT) {
- case MVT::i8:
- Opcode = NVPTX::ST_i8_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::ST_i16_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::ST_i32_avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::ST_i64_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::ST_f32_avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::ST_f64_avar;
- break;
- default:
+ Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
+ NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
+ NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
+ NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
+ if (!Opcode)
return false;
- }
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
+ NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
} else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
: SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
- switch (SourceVT) {
- case MVT::i8:
- Opcode = NVPTX::ST_i8_asi;
- break;
- case MVT::i16:
- Opcode = NVPTX::ST_i16_asi;
- break;
- case MVT::i32:
- Opcode = NVPTX::ST_i32_asi;
- break;
- case MVT::i64:
- Opcode = NVPTX::ST_i64_asi;
- break;
- case MVT::f32:
- Opcode = NVPTX::ST_f32_asi;
- break;
- case MVT::f64:
- Opcode = NVPTX::ST_f64_asi;
- break;
- default:
+ Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
+ NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
+ NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
+ NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
+ if (!Opcode)
return false;
- }
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
Offset, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
+ NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
} else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
: SelectADDRri(N2.getNode(), N2, Base, Offset)) {
- if (TM.is64Bit()) {
- switch (SourceVT) {
- case MVT::i8:
- Opcode = NVPTX::ST_i8_ari_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::ST_i16_ari_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::ST_i32_ari_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::ST_i64_ari_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::ST_f32_ari_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::ST_f64_ari_64;
- break;
- default:
- return false;
- }
- } else {
- switch (SourceVT) {
- case MVT::i8:
- Opcode = NVPTX::ST_i8_ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::ST_i16_ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::ST_i32_ari;
- break;
- case MVT::i64:
- Opcode = NVPTX::ST_i64_ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::ST_f32_ari;
- break;
- case MVT::f64:
- Opcode = NVPTX::ST_f64_ari;
- break;
- default:
- return false;
- }
- }
+ if (TM.is64Bit())
+ Opcode = pickOpcodeForVT(
+ SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
+ NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
+ NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
+ else
+ Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
+ NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
+ NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
+ NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
+ if (!Opcode)
+ return false;
+
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
Offset, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
+ NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
} else {
- if (TM.is64Bit()) {
- switch (SourceVT) {
- case MVT::i8:
- Opcode = NVPTX::ST_i8_areg_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::ST_i16_areg_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::ST_i32_areg_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::ST_i64_areg_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::ST_f32_areg_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::ST_f64_areg_64;
- break;
- default:
- return false;
- }
- } else {
- switch (SourceVT) {
- case MVT::i8:
- Opcode = NVPTX::ST_i8_areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::ST_i16_areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::ST_i32_areg;
- break;
- case MVT::i64:
- Opcode = NVPTX::ST_i64_areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::ST_f32_areg;
- break;
- case MVT::f64:
- Opcode = NVPTX::ST_f64_areg;
- break;
- default:
- return false;
- }
- }
+ if (TM.is64Bit())
+ Opcode =
+ pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
+ NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
+ NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
+ NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
+ else
+ Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
+ NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
+ NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
+ NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
+ if (!Opcode)
+ return false;
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
+ NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
}
if (!NVPTXST)
@@ -2361,7 +1797,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Addr, Offset, Base;
- unsigned Opcode;
+ Optional<unsigned> Opcode;
SDLoc DL(N);
SDNode *ST;
EVT EltVT = Op1.getValueType();
@@ -2391,7 +1827,8 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
unsigned ToTypeWidth = ScalarVT.getSizeInBits();
unsigned ToType;
if (ScalarVT.isFloatingPoint())
- ToType = NVPTX::PTXLdStInstCode::Float;
+ ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
+ : NVPTX::PTXLdStInstCode::Float;
else
ToType = NVPTX::PTXLdStInstCode::Unsigned;
@@ -2418,6 +1855,16 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
return false;
}
+ // v8f16 is a special case. PTX doesn't have st.v8.f16
+ // instruction. Instead, we split the vector into v2f16 chunks and
+ // store them with st.v4.b32.
+ if (EltVT == MVT::v2f16) {
+ assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
+ EltVT = MVT::i32;
+ ToType = NVPTX::PTXLdStInstCode::Untyped;
+ ToTypeWidth = 32;
+ }
+
StOps.push_back(getI32Imm(IsVolatile, DL));
StOps.push_back(getI32Imm(CodeAddrSpace, DL));
StOps.push_back(getI32Imm(VecType, DL));
@@ -2429,46 +1876,18 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v2_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v2_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v2_avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::STV_i64_v2_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v2_avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::STV_f64_v2_avar;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
+ NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
+ NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
+ NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
break;
case NVPTXISD::StoreV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v4_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v4_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v4_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v4_avar;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
+ NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
+ NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
+ NVPTX::STV_f32_v4_avar, None);
break;
}
StOps.push_back(Addr);
@@ -2478,46 +1897,18 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v2_asi;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v2_asi;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v2_asi;
- break;
- case MVT::i64:
- Opcode = NVPTX::STV_i64_v2_asi;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v2_asi;
- break;
- case MVT::f64:
- Opcode = NVPTX::STV_f64_v2_asi;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
+ NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
+ NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
+ NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
break;
case NVPTXISD::StoreV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v4_asi;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v4_asi;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v4_asi;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v4_asi;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
+ NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
+ NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
+ NVPTX::STV_f32_v4_asi, None);
break;
}
StOps.push_back(Base);
@@ -2529,46 +1920,19 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v2_ari_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v2_ari_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v2_ari_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::STV_i64_v2_ari_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v2_ari_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::STV_f64_v2_ari_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
+ NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
+ NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
+ NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
+ NVPTX::STV_f64_v2_ari_64);
break;
case NVPTXISD::StoreV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v4_ari_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v4_ari_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v4_ari_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v4_ari_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
+ NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
+ NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
+ NVPTX::STV_f32_v4_ari_64, None);
break;
}
} else {
@@ -2576,46 +1940,18 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v2_ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v2_ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v2_ari;
- break;
- case MVT::i64:
- Opcode = NVPTX::STV_i64_v2_ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v2_ari;
- break;
- case MVT::f64:
- Opcode = NVPTX::STV_f64_v2_ari;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
+ NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
+ NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
+ NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
break;
case NVPTXISD::StoreV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v4_ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v4_ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v4_ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v4_ari;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
+ NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
+ NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
+ NVPTX::STV_f32_v4_ari, None);
break;
}
}
@@ -2627,46 +1963,19 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v2_areg_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v2_areg_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v2_areg_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::STV_i64_v2_areg_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v2_areg_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::STV_f64_v2_areg_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
+ NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
+ NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
+ NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
+ NVPTX::STV_f64_v2_areg_64);
break;
case NVPTXISD::StoreV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v4_areg_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v4_areg_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v4_areg_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v4_areg_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
+ NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
+ NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
+ NVPTX::STV_f32_v4_areg_64, None);
break;
}
} else {
@@ -2674,55 +1983,31 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v2_areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v2_areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v2_areg;
- break;
- case MVT::i64:
- Opcode = NVPTX::STV_i64_v2_areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v2_areg;
- break;
- case MVT::f64:
- Opcode = NVPTX::STV_f64_v2_areg;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
+ NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
+ NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
+ NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
+ NVPTX::STV_f64_v2_areg);
break;
case NVPTXISD::StoreV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v4_areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v4_areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v4_areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v4_areg;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
+ NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
+ NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
+ NVPTX::STV_f32_v4_areg, None);
break;
}
}
StOps.push_back(N2);
}
+ if (!Opcode)
+ return false;
+
StOps.push_back(Chain);
- ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
+ ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
@@ -2757,87 +2042,36 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
EVT EltVT = Node->getValueType(0);
EVT MemVT = Mem->getMemoryVT();
- unsigned Opc = 0;
+ Optional<unsigned> Opcode;
switch (VecSize) {
default:
return false;
case 1:
- switch (MemVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opc = NVPTX::LoadParamMemI8;
- break;
- case MVT::i8:
- Opc = NVPTX::LoadParamMemI8;
- break;
- case MVT::i16:
- Opc = NVPTX::LoadParamMemI16;
- break;
- case MVT::i32:
- Opc = NVPTX::LoadParamMemI32;
- break;
- case MVT::i64:
- Opc = NVPTX::LoadParamMemI64;
- break;
- case MVT::f32:
- Opc = NVPTX::LoadParamMemF32;
- break;
- case MVT::f64:
- Opc = NVPTX::LoadParamMemF64;
- break;
- }
+ Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
+ NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
+ NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
+ NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
+ NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
break;
case 2:
- switch (MemVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opc = NVPTX::LoadParamMemV2I8;
- break;
- case MVT::i8:
- Opc = NVPTX::LoadParamMemV2I8;
- break;
- case MVT::i16:
- Opc = NVPTX::LoadParamMemV2I16;
- break;
- case MVT::i32:
- Opc = NVPTX::LoadParamMemV2I32;
- break;
- case MVT::i64:
- Opc = NVPTX::LoadParamMemV2I64;
- break;
- case MVT::f32:
- Opc = NVPTX::LoadParamMemV2F32;
- break;
- case MVT::f64:
- Opc = NVPTX::LoadParamMemV2F64;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
+ NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
+ NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
+ NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
+ NVPTX::LoadParamMemV2F64);
break;
case 4:
- switch (MemVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opc = NVPTX::LoadParamMemV4I8;
- break;
- case MVT::i8:
- Opc = NVPTX::LoadParamMemV4I8;
- break;
- case MVT::i16:
- Opc = NVPTX::LoadParamMemV4I16;
- break;
- case MVT::i32:
- Opc = NVPTX::LoadParamMemV4I32;
- break;
- case MVT::f32:
- Opc = NVPTX::LoadParamMemV4F32;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
+ NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
+ NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
+ NVPTX::LoadParamMemV4F32, None);
break;
}
+ if (!Opcode)
+ return false;
SDVTList VTs;
if (VecSize == 1) {
@@ -2856,7 +2090,7 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
Ops.push_back(Chain);
Ops.push_back(Flag);
- ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, VTs, Ops));
+ ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
return true;
}
@@ -2893,89 +2127,36 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
// Determine target opcode
// If we have an i1, use an 8-bit store. The lowering code in
// NVPTXISelLowering will have already emitted an upcast.
- unsigned Opcode = 0;
+ Optional<unsigned> Opcode = 0;
switch (NumElts) {
default:
return false;
case 1:
- switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opcode = NVPTX::StoreRetvalI8;
- break;
- case MVT::i8:
- Opcode = NVPTX::StoreRetvalI8;
- break;
- case MVT::i16:
- Opcode = NVPTX::StoreRetvalI16;
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreRetvalI32;
- break;
- case MVT::i64:
- Opcode = NVPTX::StoreRetvalI64;
- break;
- case MVT::f32:
- Opcode = NVPTX::StoreRetvalF32;
- break;
- case MVT::f64:
- Opcode = NVPTX::StoreRetvalF64;
- break;
- }
+ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+ NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
+ NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
+ NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
+ NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
break;
case 2:
- switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opcode = NVPTX::StoreRetvalV2I8;
- break;
- case MVT::i8:
- Opcode = NVPTX::StoreRetvalV2I8;
- break;
- case MVT::i16:
- Opcode = NVPTX::StoreRetvalV2I16;
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreRetvalV2I32;
- break;
- case MVT::i64:
- Opcode = NVPTX::StoreRetvalV2I64;
- break;
- case MVT::f32:
- Opcode = NVPTX::StoreRetvalV2F32;
- break;
- case MVT::f64:
- Opcode = NVPTX::StoreRetvalV2F64;
- break;
- }
+ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+ NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
+ NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
+ NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
+ NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
break;
case 4:
- switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opcode = NVPTX::StoreRetvalV4I8;
- break;
- case MVT::i8:
- Opcode = NVPTX::StoreRetvalV4I8;
- break;
- case MVT::i16:
- Opcode = NVPTX::StoreRetvalV4I16;
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreRetvalV4I32;
- break;
- case MVT::f32:
- Opcode = NVPTX::StoreRetvalV4F32;
- break;
- }
+ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+ NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
+ NVPTX::StoreRetvalV4I32, None,
+ NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
+ NVPTX::StoreRetvalV4F32, None);
break;
}
+ if (!Opcode)
+ return false;
- SDNode *Ret =
- CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+ SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
@@ -3024,88 +2205,36 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
// Determine target opcode
// If we have an i1, use an 8-bit store. The lowering code in
// NVPTXISelLowering will have already emitted an upcast.
- unsigned Opcode = 0;
+ Optional<unsigned> Opcode = 0;
switch (N->getOpcode()) {
default:
switch (NumElts) {
default:
return false;
case 1:
- switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opcode = NVPTX::StoreParamI8;
- break;
- case MVT::i8:
- Opcode = NVPTX::StoreParamI8;
- break;
- case MVT::i16:
- Opcode = NVPTX::StoreParamI16;
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreParamI32;
- break;
- case MVT::i64:
- Opcode = NVPTX::StoreParamI64;
- break;
- case MVT::f32:
- Opcode = NVPTX::StoreParamF32;
- break;
- case MVT::f64:
- Opcode = NVPTX::StoreParamF64;
- break;
- }
+ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+ NVPTX::StoreParamI8, NVPTX::StoreParamI16,
+ NVPTX::StoreParamI32, NVPTX::StoreParamI64,
+ NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
+ NVPTX::StoreParamF32, NVPTX::StoreParamF64);
break;
case 2:
- switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opcode = NVPTX::StoreParamV2I8;
- break;
- case MVT::i8:
- Opcode = NVPTX::StoreParamV2I8;
- break;
- case MVT::i16:
- Opcode = NVPTX::StoreParamV2I16;
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreParamV2I32;
- break;
- case MVT::i64:
- Opcode = NVPTX::StoreParamV2I64;
- break;
- case MVT::f32:
- Opcode = NVPTX::StoreParamV2F32;
- break;
- case MVT::f64:
- Opcode = NVPTX::StoreParamV2F64;
- break;
- }
+ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+ NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
+ NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
+ NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
+ NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
break;
case 4:
- switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opcode = NVPTX::StoreParamV4I8;
- break;
- case MVT::i8:
- Opcode = NVPTX::StoreParamV4I8;
- break;
- case MVT::i16:
- Opcode = NVPTX::StoreParamV4I16;
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreParamV4I32;
- break;
- case MVT::f32:
- Opcode = NVPTX::StoreParamV4F32;
- break;
- }
+ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+ NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
+ NVPTX::StoreParamV4I32, None,
+ NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
+ NVPTX::StoreParamV4F32, None);
break;
}
+ if (!Opcode)
+ return false;
break;
// Special case: if we have a sign-extend/zero-extend node, insert the
// conversion instruction first, and use that as the value operand to
@@ -3132,7 +2261,7 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
SDNode *Ret =
- CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
+ CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 0591035a6aa8..8fc38e7c4612 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -34,6 +34,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool usePrecSqrtF32() const;
bool useF32FTZ() const;
bool allowFMA() const;
+ bool allowUnsafeFPMath() const;
public:
explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
@@ -69,6 +70,9 @@ private:
bool tryTextureIntrinsic(SDNode *N);
bool trySurfaceIntrinsic(SDNode *N);
bool tryBFE(SDNode *N);
+ bool tryConstantFP16(SDNode *N);
+ bool SelectSETP_F16X2(SDNode *N);
+ bool tryEXTRACT_VECTOR_ELEMENT(SDNode *N);
inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 7a760fd38d0f..4d06912054a2 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -79,6 +79,60 @@ FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
" 1: do it 2: do it aggressively"),
cl::init(2));
+static cl::opt<int> UsePrecDivF32(
+ "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
+ " IEEE Compliant F32 div.rnd if available."),
+ cl::init(2));
+
+static cl::opt<bool> UsePrecSqrtF32(
+ "nvptx-prec-sqrtf32", cl::Hidden,
+ cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
+ cl::init(true));
+
+static cl::opt<bool> FtzEnabled(
+ "nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
+ cl::init(false));
+
+int NVPTXTargetLowering::getDivF32Level() const {
+ if (UsePrecDivF32.getNumOccurrences() > 0) {
+ // If nvptx-prec-div32=N is used on the command-line, always honor it
+ return UsePrecDivF32;
+ } else {
+ // Otherwise, use div.approx if fast math is enabled
+ if (getTargetMachine().Options.UnsafeFPMath)
+ return 0;
+ else
+ return 2;
+ }
+}
+
+bool NVPTXTargetLowering::usePrecSqrtF32() const {
+ if (UsePrecSqrtF32.getNumOccurrences() > 0) {
+ // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
+ return UsePrecSqrtF32;
+ } else {
+ // Otherwise, use sqrt.approx if fast math is enabled
+ return !getTargetMachine().Options.UnsafeFPMath;
+ }
+}
+
+bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const {
+ // TODO: Get rid of this flag; there can be only one way to do this.
+ if (FtzEnabled.getNumOccurrences() > 0) {
+ // If nvptx-f32ftz is used on the command-line, always honor it
+ return FtzEnabled;
+ } else {
+ const Function *F = MF.getFunction();
+ // Otherwise, check for an nvptx-f32ftz attribute on the function
+ if (F->hasFnAttribute("nvptx-f32ftz"))
+ return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
+ else
+ return false;
+ }
+}
+
static bool IsPTXVectorType(MVT VT) {
switch (VT.SimpleTy) {
default:
@@ -92,6 +146,9 @@ static bool IsPTXVectorType(MVT VT) {
case MVT::v2i32:
case MVT::v4i32:
case MVT::v2i64:
+ case MVT::v2f16:
+ case MVT::v4f16:
+ case MVT::v8f16: // <4 x f16x2>
case MVT::v2f32:
case MVT::v4f32:
case MVT::v2f64:
@@ -116,13 +173,24 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
EVT VT = TempVTs[i];
uint64_t Off = TempOffsets[i];
- if (VT.isVector())
- for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) {
- ValueVTs.push_back(VT.getVectorElementType());
+ // Split vectors into individual elements, except for v2f16, which
+ // we will pass as a single scalar.
+ if (VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ // Vectors with an even number of f16 elements will be passed to
+ // us as an array of v2f16 elements. We must match this so we
+ // stay in sync with Ins/Outs.
+ if (EltVT == MVT::f16 && NumElts % 2 == 0) {
+ EltVT = MVT::v2f16;
+ NumElts /= 2;
+ }
+ for (unsigned j = 0; j != NumElts; ++j) {
+ ValueVTs.push_back(EltVT);
if (Offsets)
- Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize());
+ Offsets->push_back(Off + j * EltVT.getStoreSize());
}
- else {
+ } else {
ValueVTs.push_back(VT);
if (Offsets)
Offsets->push_back(Off);
@@ -130,6 +198,125 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
}
}
+// Check whether we can merge loads/stores of some of the pieces of a
+// flattened function parameter or return value into a single vector
+// load/store.
+//
+// The flattened parameter is represented as a list of EVTs and
+// offsets, and the whole structure is aligned to ParamAlignment. This
+// function determines whether we can load/store pieces of the
+// parameter starting at index Idx using a single vectorized op of
+// size AccessSize. If so, it returns the number of param pieces
+// covered by the vector op. Otherwise, it returns 1.
+static unsigned CanMergeParamLoadStoresStartingAt(
+ unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
+ const SmallVectorImpl<uint64_t> &Offsets, unsigned ParamAlignment) {
+ assert(isPowerOf2_32(AccessSize) && "must be a power of 2!");
+
+ // Can't vectorize if param alignment is not sufficient.
+ if (AccessSize > ParamAlignment)
+ return 1;
+ // Can't vectorize if offset is not aligned.
+ if (Offsets[Idx] & (AccessSize - 1))
+ return 1;
+
+ EVT EltVT = ValueVTs[Idx];
+ unsigned EltSize = EltVT.getStoreSize();
+
+ // Element is too large to vectorize.
+ if (EltSize >= AccessSize)
+ return 1;
+
+ unsigned NumElts = AccessSize / EltSize;
+ // Can't vectorize if AccessBytes if not a multiple of EltSize.
+ if (AccessSize != EltSize * NumElts)
+ return 1;
+
+ // We don't have enough elements to vectorize.
+ if (Idx + NumElts > ValueVTs.size())
+ return 1;
+
+ // PTX ISA can only deal with 2- and 4-element vector ops.
+ if (NumElts != 4 && NumElts != 2)
+ return 1;
+
+ for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) {
+ // Types do not match.
+ if (ValueVTs[j] != EltVT)
+ return 1;
+
+ // Elements are not contiguous.
+ if (Offsets[j] - Offsets[j - 1] != EltSize)
+ return 1;
+ }
+ // OK. We can vectorize ValueVTs[i..i+NumElts)
+ return NumElts;
+}
+
+// Flags for tracking per-element vectorization state of loads/stores
+// of a flattened function parameter or return value.
+enum ParamVectorizationFlags {
+ PVF_INNER = 0x0, // Middle elements of a vector.
+ PVF_FIRST = 0x1, // First element of the vector.
+ PVF_LAST = 0x2, // Last element of the vector.
+ // Scalar is effectively a 1-element vector.
+ PVF_SCALAR = PVF_FIRST | PVF_LAST
+};
+
+// Computes whether and how we can vectorize the loads/stores of a
+// flattened function parameter or return value.
+//
+// The flattened parameter is represented as the list of ValueVTs and
+// Offsets, and is aligned to ParamAlignment bytes. We return a vector
+// of the same size as ValueVTs indicating how each piece should be
+// loaded/stored (i.e. as a scalar, or as part of a vector
+// load/store).
+static SmallVector<ParamVectorizationFlags, 16>
+VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
+ const SmallVectorImpl<uint64_t> &Offsets,
+ unsigned ParamAlignment) {
+ // Set vector size to match ValueVTs and mark all elements as
+ // scalars by default.
+ SmallVector<ParamVectorizationFlags, 16> VectorInfo;
+ VectorInfo.assign(ValueVTs.size(), PVF_SCALAR);
+
+ // Check what we can vectorize using 128/64/32-bit accesses.
+ for (int I = 0, E = ValueVTs.size(); I != E; ++I) {
+ // Skip elements we've already processed.
+ assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state.");
+ for (unsigned AccessSize : {16, 8, 4, 2}) {
+ unsigned NumElts = CanMergeParamLoadStoresStartingAt(
+ I, AccessSize, ValueVTs, Offsets, ParamAlignment);
+ // Mark vectorized elements.
+ switch (NumElts) {
+ default:
+ llvm_unreachable("Unexpected return value");
+ case 1:
+ // Can't vectorize using this size, try next smaller size.
+ continue;
+ case 2:
+ assert(I + 1 < E && "Not enough elements.");
+ VectorInfo[I] = PVF_FIRST;
+ VectorInfo[I + 1] = PVF_LAST;
+ I += 1;
+ break;
+ case 4:
+ assert(I + 3 < E && "Not enough elements.");
+ VectorInfo[I] = PVF_FIRST;
+ VectorInfo[I + 1] = PVF_INNER;
+ VectorInfo[I + 2] = PVF_INNER;
+ VectorInfo[I + 3] = PVF_LAST;
+ I += 3;
+ break;
+ }
+ // Break out of the inner loop because we've already succeeded
+ // using largest possible AccessSize.
+ break;
+ }
+ }
+ return VectorInfo;
+}
+
// NVPTXTargetLowering Constructor.
NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
const NVPTXSubtarget &STI)
@@ -158,14 +345,32 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
else
setSchedulingPreference(Sched::Source);
+ auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
+ LegalizeAction NoF16Action) {
+ setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action);
+ };
+
addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
+ addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass);
+ addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass);
+
+ // Conversion to/from FP16/FP16x2 is always legal.
+ setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::f16, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
+
+ setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
+ setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
// Operations not directly supported by NVPTX.
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::v2f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
@@ -173,6 +378,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::v2f16, Expand);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
@@ -195,6 +402,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom);
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+ setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
+
if (STI.hasROT64()) {
setOperationAction(ISD::ROTL, MVT::i64, Legal);
setOperationAction(ISD::ROTR, MVT::i64, Legal);
@@ -259,6 +469,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// This is legal in NVPTX
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
// TRAP can be lowered to PTX trap
setOperationAction(ISD::TRAP, MVT::Other, Legal);
@@ -278,15 +489,19 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// Custom handling for i8 intrinsics
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
- setOperationAction(ISD::CTLZ, MVT::i16, Legal);
- setOperationAction(ISD::CTLZ, MVT::i32, Legal);
- setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+ for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) {
+ setOperationAction(ISD::SMIN, Ty, Legal);
+ setOperationAction(ISD::SMAX, Ty, Legal);
+ setOperationAction(ISD::UMIN, Ty, Legal);
+ setOperationAction(ISD::UMAX, Ty, Legal);
+
+ setOperationAction(ISD::CTPOP, Ty, Legal);
+ setOperationAction(ISD::CTLZ, Ty, Legal);
+ }
+
setOperationAction(ISD::CTTZ, MVT::i16, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i64, Expand);
- setOperationAction(ISD::CTPOP, MVT::i16, Legal);
- setOperationAction(ISD::CTPOP, MVT::i32, Legal);
- setOperationAction(ISD::CTPOP, MVT::i64, Legal);
// PTX does not directly support SELP of i1, so promote to i32 first
setOperationAction(ISD::SELECT, MVT::i1, Custom);
@@ -301,28 +516,60 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SREM);
setTargetDAGCombine(ISD::UREM);
- // Library functions. These default to Expand, but we have instructions
- // for them.
- setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FROUND, MVT::f32, Legal);
- setOperationAction(ISD::FROUND, MVT::f64, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+ // setcc for f16x2 needs special handling to prevent legalizer's
+ // attempt to scalarize it due to v2i1 not being legal.
+ if (STI.allowFP16Math())
+ setTargetDAGCombine(ISD::SETCC);
+
+ // Promote fp16 arithmetic if fp16 hardware isn't available or the
+ // user passed --nvptx-no-fp16-math. The flag is useful because,
+ // although sm_53+ GPUs have some sort of FP16 support in
+ // hardware, only sm_53 and sm_60 have full implementation. Others
+ // only have token amount of hardware and are likely to run faster
+ // by using fp32 units instead.
+ for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) {
+ setFP16OperationAction(Op, MVT::f16, Legal, Promote);
+ setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
+ }
+
+ // There's no neg.f16 instruction. Expand to (0-x).
+ setOperationAction(ISD::FNEG, MVT::f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::v2f16, Expand);
+
+ // (would be) Library functions.
+
+ // These map to conversion instructions for scalar FP types.
+ for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
+ ISD::FROUND, ISD::FTRUNC}) {
+ setOperationAction(Op, MVT::f16, Legal);
+ setOperationAction(Op, MVT::f32, Legal);
+ setOperationAction(Op, MVT::f64, Legal);
+ setOperationAction(Op, MVT::v2f16, Expand);
+ }
+
+ // 'Expand' implements FCOPYSIGN without calling an external library.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+
+ // These map to corresponding instructions for f32/f64. f16 must be
+ // promoted to f32. v2f16 is expanded to f16, which is then promoted
+ // to f32.
+ for (const auto &Op : {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS,
+ ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM}) {
+ setOperationAction(Op, MVT::f16, Promote);
+ setOperationAction(Op, MVT::f32, Legal);
+ setOperationAction(Op, MVT::f64, Legal);
+ setOperationAction(Op, MVT::v2f16, Expand);
+ }
+ setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
// No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate.
// No FPOW or FREM in PTX.
@@ -434,6 +681,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::FUN_SHFR_CLAMP";
case NVPTXISD::IMAD:
return "NVPTXISD::IMAD";
+ case NVPTXISD::SETP_F16X2:
+ return "NVPTXISD::SETP_F16X2";
case NVPTXISD::Dummy:
return "NVPTXISD::Dummy";
case NVPTXISD::MUL_WIDE_SIGNED:
@@ -932,10 +1181,60 @@ TargetLoweringBase::LegalizeTypeAction
NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
return TypeSplitVector;
-
+ if (VT == MVT::v2f16)
+ return TypeLegal;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled, int &ExtraSteps,
+ bool &UseOneConst,
+ bool Reciprocal) const {
+ if (!(Enabled == ReciprocalEstimate::Enabled ||
+ (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32())))
+ return SDValue();
+
+ if (ExtraSteps == ReciprocalEstimate::Unspecified)
+ ExtraSteps = 0;
+
+ SDLoc DL(Operand);
+ EVT VT = Operand.getValueType();
+ bool Ftz = useF32FTZ(DAG.getMachineFunction());
+
+ auto MakeIntrinsicCall = [&](Intrinsic::ID IID) {
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(IID, DL, MVT::i32), Operand);
+ };
+
+ // The sqrt and rsqrt refinement processes assume we always start out with an
+ // approximation of the rsqrt. Therefore, if we're going to do any refinement
+ // (i.e. ExtraSteps > 0), we must return an rsqrt. But if we're *not* doing
+ // any refinement, we must return a regular sqrt.
+ if (Reciprocal || ExtraSteps > 0) {
+ if (VT == MVT::f32)
+ return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f
+ : Intrinsic::nvvm_rsqrt_approx_f);
+ else if (VT == MVT::f64)
+ return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d);
+ else
+ return SDValue();
+ } else {
+ if (VT == MVT::f32)
+ return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
+ : Intrinsic::nvvm_sqrt_approx_f);
+ else {
+ // There's no sqrt.approx.f64 instruction, so we emit
+ // reciprocal(rsqrt(x)). This is faster than
+ // select(x == 0, 0, x * rsqrt(x)). (In fact, it's faster than plain
+ // x * rsqrt(x).)
+ return DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32),
+ MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
+ }
+ }
+}
+
SDValue
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -967,19 +1266,21 @@ std::string NVPTXTargetLowering::getPrototype(
unsigned size = 0;
if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
size = ITy->getBitWidth();
- if (size < 32)
- size = 32;
} else {
assert(retTy->isFloatingPointTy() &&
"Floating point type expected here");
size = retTy->getPrimitiveSizeInBits();
}
+ // PTX ABI requires all scalar return values to be at least 32
+ // bits in size. fp16 normally uses .b16 as its storage type in
+ // PTX, so its size must be adjusted here, too.
+ if (size < 32)
+ size = 32;
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy)) {
O << ".param .b" << PtrVT.getSizeInBits() << " _";
- } else if ((retTy->getTypeID() == Type::StructTyID) ||
- isa<VectorType>(retTy)) {
+ } else if (retTy->isAggregateType() || retTy->isVectorTy()) {
auto &DL = CS->getCalledFunction()->getParent()->getDataLayout();
O << ".param .align " << retAlignment << " .b8 _["
<< DL.getTypeAllocSize(retTy) << "]";
@@ -1018,7 +1319,7 @@ std::string NVPTXTargetLowering::getPrototype(
OIdx += len - 1;
continue;
}
- // i8 types in IR will be i16 types in SDAG
+ // i8 types in IR will be i16 types in SDAG
assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
(getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
"type mismatch between callee prototype and arguments");
@@ -1028,8 +1329,13 @@ std::string NVPTXTargetLowering::getPrototype(
sz = cast<IntegerType>(Ty)->getBitWidth();
if (sz < 32)
sz = 32;
- } else if (isa<PointerType>(Ty))
+ } else if (isa<PointerType>(Ty)) {
sz = PtrVT.getSizeInBits();
+ } else if (Ty->isHalfTy())
+ // PTX ABI requires all scalar parameters to be at least 32
+ // bits in size. fp16 normally uses .b16 as its storage type
+ // in PTX, so its size must be adjusted here, too.
+ sz = 32;
else
sz = Ty->getPrimitiveSizeInBits();
O << ".param .b" << sz << " ";
@@ -1113,21 +1419,18 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
ArgListTy &Args = CLI.getArgs();
- Type *retTy = CLI.RetTy;
+ Type *RetTy = CLI.RetTy;
ImmutableCallSite *CS = CLI.CS;
+ const DataLayout &DL = DAG.getDataLayout();
bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
- MachineFunction &MF = DAG.getMachineFunction();
- const Function *F = MF.getFunction();
- auto &DL = MF.getDataLayout();
SDValue tempChain = Chain;
- Chain = DAG.getCALLSEQ_START(Chain,
- DAG.getIntPtrConstant(uniqueCallSite, dl, true),
- dl);
+ Chain = DAG.getCALLSEQ_START(
+ Chain, DAG.getIntPtrConstant(uniqueCallSite, dl, true), dl);
SDValue InFlag = Chain.getValue(1);
unsigned paramCount = 0;
@@ -1148,240 +1451,124 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Type *Ty = Args[i].Ty;
if (!Outs[OIdx].Flags.isByVal()) {
- if (Ty->isAggregateType()) {
- // aggregate
- SmallVector<EVT, 16> vtparts;
- SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets,
- 0);
-
- unsigned align =
- getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
+ SmallVector<EVT, 16> VTs;
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets);
+ unsigned ArgAlign =
+ getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
+ unsigned AllocSize = DL.getTypeAllocSize(Ty);
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ bool NeedAlign; // Does argument declaration specify alignment?
+ if (Ty->isAggregateType() || Ty->isVectorTy()) {
// declare .param .align <align> .b8 .param<n>[<size>];
- unsigned sz = DL.getTypeAllocSize(Ty);
- SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl,
- MVT::i32),
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(sz, dl, MVT::i32),
- InFlag };
+ SDValue DeclareParamOps[] = {
+ Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(AllocSize, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
- InFlag = Chain.getValue(1);
- for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
- EVT elemtype = vtparts[j];
- unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]);
- if (elemtype.isInteger() && (sz < 8))
- sz = 8;
- SDValue StVal = OutVals[OIdx];
- if (elemtype.getSizeInBits() < 16) {
- StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
- }
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(Offsets[j], dl, MVT::i32),
- StVal, InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
- CopyParamVTs, CopyParamOps,
- elemtype, MachinePointerInfo(),
- ArgAlign);
- InFlag = Chain.getValue(1);
- ++OIdx;
+ NeedAlign = true;
+ } else {
+ // declare .param .b<size> .param<n>;
+ if ((VT.isInteger() || VT.isFloatingPoint()) && AllocSize < 4) {
+ // PTX ABI requires integral types to be at least 32 bits in
+ // size. FP16 is loaded/stored using i16, so it's handled
+ // here as well.
+ AllocSize = 4;
}
- if (vtparts.size() > 0)
- --OIdx;
- ++paramCount;
- continue;
+ SDValue DeclareScalarParamOps[] = {
+ Chain, DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(AllocSize * 8, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), InFlag};
+ Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
+ DeclareScalarParamOps);
+ NeedAlign = false;
}
- if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(DL, Ty);
- unsigned align =
- getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
- // declare .param .align <align> .b8 .param<n>[<size>];
- unsigned sz = DL.getTypeAllocSize(Ty);
- SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareParamOps[] = { Chain,
- DAG.getConstant(align, dl, MVT::i32),
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(sz, dl, MVT::i32),
- InFlag };
- Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
- DeclareParamOps);
- InFlag = Chain.getValue(1);
- unsigned NumElts = ObjectVT.getVectorNumElements();
- EVT EltVT = ObjectVT.getVectorElementType();
- EVT MemVT = EltVT;
- bool NeedExtend = false;
- if (EltVT.getSizeInBits() < 16) {
- NeedExtend = true;
- EltVT = MVT::i16;
+ InFlag = Chain.getValue(1);
+
+ // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
+ // than 32-bits are sign extended or zero extended, depending on
+ // whether they are signed or unsigned types. This case applies
+ // only to scalar parameters and not to aggregate values.
+ bool ExtendIntegerParam =
+ Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;
+
+ auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
+ SmallVector<SDValue, 6> StoreOperands;
+ for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
+ // New store.
+ if (VectorInfo[j] & PVF_FIRST) {
+ assert(StoreOperands.empty() && "Unfinished preceeding store.");
+ StoreOperands.push_back(Chain);
+ StoreOperands.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
+ StoreOperands.push_back(DAG.getConstant(Offsets[j], dl, MVT::i32));
}
- // V1 store
- if (NumElts == 1) {
- SDValue Elt = OutVals[OIdx++];
- if (NeedExtend)
- Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt);
-
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), Elt,
- InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
- CopyParamVTs, CopyParamOps,
- MemVT, MachinePointerInfo());
- InFlag = Chain.getValue(1);
- } else if (NumElts == 2) {
- SDValue Elt0 = OutVals[OIdx++];
- SDValue Elt1 = OutVals[OIdx++];
- if (NeedExtend) {
- Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0);
- Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1);
+ EVT EltVT = VTs[j];
+ SDValue StVal = OutVals[OIdx];
+ if (ExtendIntegerParam) {
+ assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
+ // zext/sext to i32
+ StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND,
+ dl, MVT::i32, StVal);
+ } else if (EltVT.getSizeInBits() < 16) {
+ // Use 16-bit registers for small stores as it's the
+ // smallest general purpose register size supported by NVPTX.
+ StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
+ }
+
+ // Record the value to store.
+ StoreOperands.push_back(StVal);
+
+ if (VectorInfo[j] & PVF_LAST) {
+ unsigned NumElts = StoreOperands.size() - 3;
+ NVPTXISD::NodeType Op;
+ switch (NumElts) {
+ case 1:
+ Op = NVPTXISD::StoreParam;
+ break;
+ case 2:
+ Op = NVPTXISD::StoreParamV2;
+ break;
+ case 4:
+ Op = NVPTXISD::StoreParamV4;
+ break;
+ default:
+ llvm_unreachable("Invalid vector info.");
}
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), Elt0,
- Elt1, InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl,
- CopyParamVTs, CopyParamOps,
- MemVT, MachinePointerInfo());
- InFlag = Chain.getValue(1);
- } else {
- unsigned curOffset = 0;
- // V4 stores
- // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
- // the
- // vector will be expanded to a power of 2 elements, so we know we can
- // always round up to the next multiple of 4 when creating the vector
- // stores.
- // e.g. 4 elem => 1 st.v4
- // 6 elem => 2 st.v4
- // 8 elem => 2 st.v4
- // 11 elem => 3 st.v4
- unsigned VecSize = 4;
- if (EltVT.getSizeInBits() == 64)
- VecSize = 2;
-
- // This is potentially only part of a vector, so assume all elements
- // are packed together.
- unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize;
-
- for (unsigned i = 0; i < NumElts; i += VecSize) {
- // Get values
- SDValue StoreVal;
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
- Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32));
-
- unsigned Opc = NVPTXISD::StoreParamV2;
-
- StoreVal = OutVals[OIdx++];
- if (NeedExtend)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
- Ops.push_back(StoreVal);
-
- if (i + 1 < NumElts) {
- StoreVal = OutVals[OIdx++];
- if (NeedExtend)
- StoreVal =
- DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
- } else {
- StoreVal = DAG.getUNDEF(EltVT);
- }
- Ops.push_back(StoreVal);
-
- if (VecSize == 4) {
- Opc = NVPTXISD::StoreParamV4;
- if (i + 2 < NumElts) {
- StoreVal = OutVals[OIdx++];
- if (NeedExtend)
- StoreVal =
- DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
- } else {
- StoreVal = DAG.getUNDEF(EltVT);
- }
- Ops.push_back(StoreVal);
-
- if (i + 3 < NumElts) {
- StoreVal = OutVals[OIdx++];
- if (NeedExtend)
- StoreVal =
- DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
- } else {
- StoreVal = DAG.getUNDEF(EltVT);
- }
- Ops.push_back(StoreVal);
- }
+ StoreOperands.push_back(InFlag);
- Ops.push_back(InFlag);
+ // Adjust type of the store op if we've extended the scalar
+ // return value.
+ EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j];
+ unsigned EltAlign =
+ NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0;
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops,
- MemVT, MachinePointerInfo());
- InFlag = Chain.getValue(1);
- curOffset += PerStoreOffset;
- }
+ Chain = DAG.getMemIntrinsicNode(
+ Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
+ TheStoreType, MachinePointerInfo(), EltAlign);
+ InFlag = Chain.getValue(1);
+
+ // Cleanup.
+ StoreOperands.clear();
}
- ++paramCount;
- --OIdx;
- continue;
- }
- // Plain scalar
- // for ABI, declare .param .b<size> .param<n>;
- unsigned sz = VT.getSizeInBits();
- bool needExtend = false;
- if (VT.isInteger()) {
- if (sz < 16)
- needExtend = true;
- if (sz < 32)
- sz = 32;
+ ++OIdx;
}
- SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareParamOps[] = { Chain,
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(sz, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag };
- Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
- DeclareParamOps);
- InFlag = Chain.getValue(1);
- SDValue OutV = OutVals[OIdx];
- if (needExtend) {
- // zext/sext i1 to i16
- unsigned opc = ISD::ZERO_EXTEND;
- if (Outs[OIdx].Flags.isSExt())
- opc = ISD::SIGN_EXTEND;
- OutV = DAG.getNode(opc, dl, MVT::i16, OutV);
- }
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), OutV,
- InFlag };
-
- unsigned opcode = NVPTXISD::StoreParam;
- if (Outs[OIdx].Flags.isZExt() && VT.getSizeInBits() < 32)
- opcode = NVPTXISD::StoreParamU32;
- else if (Outs[OIdx].Flags.isSExt() && VT.getSizeInBits() < 32)
- opcode = NVPTXISD::StoreParamS32;
- Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps,
- VT, MachinePointerInfo());
-
- InFlag = Chain.getValue(1);
+ assert(StoreOperands.empty() && "Unfinished parameter store.");
+ if (VTs.size() > 0)
+ --OIdx;
++paramCount;
continue;
}
- // struct or vector
- SmallVector<EVT, 16> vtparts;
+
+ // ByVal arguments
+ SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
assert(PTy && "Type of a byval parameter should be pointer");
- ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
- vtparts, &Offsets, 0);
+ ComputePTXValueVTs(*this, DL, PTy->getElementType(), VTs, &Offsets, 0);
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = Outs[OIdx].Flags.getByValSize();
@@ -1402,11 +1589,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
- for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
- EVT elemtype = vtparts[j];
+ for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
+ EVT elemtype = VTs[j];
int curOffset = Offsets[j];
unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
- auto PtrVT = getPointerTy(DAG.getDataLayout());
+ auto PtrVT = getPointerTy(DL);
SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
DAG.getConstant(curOffset, dl, PtrVT));
SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
@@ -1434,18 +1621,18 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle Result
if (Ins.size() > 0) {
SmallVector<EVT, 16> resvtparts;
- ComputeValueVTs(*this, DL, retTy, resvtparts);
+ ComputeValueVTs(*this, DL, RetTy, resvtparts);
// Declare
// .param .align 16 .b8 retval0[<size-in-bytes>], or
// .param .b<size-in-bits> retval0
- unsigned resultsz = DL.getTypeAllocSizeInBits(retTy);
+ unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy);
// Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
// these three types to match the logic in
// NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
// Plus, this behavior is consistent with nvcc's.
- if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
- retTy->isPointerTy()) {
+ if (RetTy->isFloatingPointTy() || RetTy->isIntegerTy() ||
+ RetTy->isPointerTy()) {
// Scalar needs to be at least 32bit wide
if (resultsz < 32)
resultsz = 32;
@@ -1457,7 +1644,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DeclareRetOps);
InFlag = Chain.getValue(1);
} else {
- retAlignment = getArgumentAlignment(Callee, CS, retTy, 0, DL);
+ retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareRetOps[] = { Chain,
DAG.getConstant(retAlignment, dl, MVT::i32),
@@ -1478,8 +1665,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The prototype is embedded in a string and put as the operand for a
// CallPrototype SDNode which will print out to the value of the string.
SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string Proto =
- getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS);
+ std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, CS);
const char *ProtoStr =
nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
SDValue ProtoOps[] = {
@@ -1544,175 +1730,84 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Generate loads from param memory/moves from registers for result
if (Ins.size() > 0) {
- if (retTy && retTy->isVectorTy()) {
- EVT ObjectVT = getValueType(DL, retTy);
- unsigned NumElts = ObjectVT.getVectorNumElements();
- EVT EltVT = ObjectVT.getVectorElementType();
- assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
- ObjectVT) == NumElts &&
- "Vector was not scalarized");
- unsigned sz = EltVT.getSizeInBits();
- bool needTruncate = sz < 8;
-
- if (NumElts == 1) {
- // Just a simple load
- SmallVector<EVT, 4> LoadRetVTs;
- if (EltVT == MVT::i1 || EltVT == MVT::i8) {
- // If loading i1/i8 result, generate
- // load.b8 i16
- // if i1
- // trunc i16 to i1
- LoadRetVTs.push_back(MVT::i16);
- } else
- LoadRetVTs.push_back(EltVT);
- LoadRetVTs.push_back(MVT::Other);
- LoadRetVTs.push_back(MVT::Glue);
- SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag};
- SDValue retval = DAG.getMemIntrinsicNode(
- NVPTXISD::LoadParam, dl,
- DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
- Chain = retval.getValue(1);
- InFlag = retval.getValue(2);
- SDValue Ret0 = retval;
- if (needTruncate)
- Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0);
- InVals.push_back(Ret0);
- } else if (NumElts == 2) {
- // LoadV2
- SmallVector<EVT, 4> LoadRetVTs;
- if (EltVT == MVT::i1 || EltVT == MVT::i8) {
- // If loading i1/i8 result, generate
- // load.b8 i16
- // if i1
- // trunc i16 to i1
- LoadRetVTs.push_back(MVT::i16);
- LoadRetVTs.push_back(MVT::i16);
- } else {
- LoadRetVTs.push_back(EltVT);
- LoadRetVTs.push_back(EltVT);
- }
- LoadRetVTs.push_back(MVT::Other);
- LoadRetVTs.push_back(MVT::Glue);
- SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag};
- SDValue retval = DAG.getMemIntrinsicNode(
- NVPTXISD::LoadParamV2, dl,
- DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
- Chain = retval.getValue(2);
- InFlag = retval.getValue(3);
- SDValue Ret0 = retval.getValue(0);
- SDValue Ret1 = retval.getValue(1);
- if (needTruncate) {
- Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0);
- InVals.push_back(Ret0);
- Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1);
- InVals.push_back(Ret1);
- } else {
- InVals.push_back(Ret0);
- InVals.push_back(Ret1);
- }
- } else {
- // Split into N LoadV4
- unsigned Ofst = 0;
- unsigned VecSize = 4;
- unsigned Opc = NVPTXISD::LoadParamV4;
- if (EltVT.getSizeInBits() == 64) {
- VecSize = 2;
- Opc = NVPTXISD::LoadParamV2;
- }
- EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
- for (unsigned i = 0; i < NumElts; i += VecSize) {
- SmallVector<EVT, 8> LoadRetVTs;
- if (EltVT == MVT::i1 || EltVT == MVT::i8) {
- // If loading i1/i8 result, generate
- // load.b8 i16
- // if i1
- // trunc i16 to i1
- for (unsigned j = 0; j < VecSize; ++j)
- LoadRetVTs.push_back(MVT::i16);
- } else {
- for (unsigned j = 0; j < VecSize; ++j)
- LoadRetVTs.push_back(EltVT);
- }
- LoadRetVTs.push_back(MVT::Other);
- LoadRetVTs.push_back(MVT::Glue);
- SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(Ofst, dl, MVT::i32), InFlag};
- SDValue retval = DAG.getMemIntrinsicNode(
- Opc, dl, DAG.getVTList(LoadRetVTs),
- LoadRetOps, EltVT, MachinePointerInfo());
- if (VecSize == 2) {
- Chain = retval.getValue(2);
- InFlag = retval.getValue(3);
- } else {
- Chain = retval.getValue(4);
- InFlag = retval.getValue(5);
- }
+ SmallVector<EVT, 16> VTs;
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
+ assert(VTs.size() == Ins.size() && "Bad value decomposition");
+
+ unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
+ auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
+
+ SmallVector<EVT, 6> LoadVTs;
+ int VecIdx = -1; // Index of the first element of the vector.
+
+ // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
+ // 32-bits are sign extended or zero extended, depending on whether
+ // they are signed or unsigned types.
+ bool ExtendIntegerRetVal =
+ RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
+
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ bool needTruncate = false;
+ EVT TheLoadType = VTs[i];
+ EVT EltType = Ins[i].VT;
+ unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]);
+ if (ExtendIntegerRetVal) {
+ TheLoadType = MVT::i32;
+ EltType = MVT::i32;
+ needTruncate = true;
+ } else if (TheLoadType.getSizeInBits() < 16) {
+ if (VTs[i].isInteger())
+ needTruncate = true;
+ EltType = MVT::i16;
+ }
- for (unsigned j = 0; j < VecSize; ++j) {
- if (i + j >= NumElts)
- break;
- SDValue Elt = retval.getValue(j);
- if (needTruncate)
- Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
- InVals.push_back(Elt);
- }
- Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
- }
+ // Record index of the very first element of the vector.
+ if (VectorInfo[i] & PVF_FIRST) {
+ assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");
+ VecIdx = i;
}
- } else {
- SmallVector<EVT, 16> VTs;
- SmallVector<uint64_t, 16> Offsets;
- auto &DL = DAG.getDataLayout();
- ComputePTXValueVTs(*this, DL, retTy, VTs, &Offsets, 0);
- assert(VTs.size() == Ins.size() && "Bad value decomposition");
- unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0, DL);
- for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- unsigned sz = VTs[i].getSizeInBits();
- unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
- bool needTruncate = false;
- if (VTs[i].isInteger() && sz < 8) {
- sz = 8;
- needTruncate = true;
+
+ LoadVTs.push_back(EltType);
+
+ if (VectorInfo[i] & PVF_LAST) {
+ unsigned NumElts = LoadVTs.size();
+ LoadVTs.push_back(MVT::Other);
+ LoadVTs.push_back(MVT::Glue);
+ NVPTXISD::NodeType Op;
+ switch (NumElts) {
+ case 1:
+ Op = NVPTXISD::LoadParam;
+ break;
+ case 2:
+ Op = NVPTXISD::LoadParamV2;
+ break;
+ case 4:
+ Op = NVPTXISD::LoadParamV4;
+ break;
+ default:
+ llvm_unreachable("Invalid vector info.");
}
- SmallVector<EVT, 4> LoadRetVTs;
- EVT TheLoadType = VTs[i];
- if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) {
- // This is for integer types only, and specifically not for
- // aggregates.
- LoadRetVTs.push_back(MVT::i32);
- TheLoadType = MVT::i32;
- needTruncate = true;
- } else if (sz < 16) {
- // If loading i1/i8 result, generate
- // load i8 (-> i16)
- // trunc i16 to i1/i8
-
- // FIXME: Do we need to set needTruncate to true here, too? We could
- // not figure out what this branch is for in D17872, so we left it
- // alone. The comment above about loading i1/i8 may be wrong, as the
- // branch above seems to cover integers of size < 32.
- LoadRetVTs.push_back(MVT::i16);
- } else
- LoadRetVTs.push_back(Ins[i].VT);
- LoadRetVTs.push_back(MVT::Other);
- LoadRetVTs.push_back(MVT::Glue);
-
- SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(Offsets[i], dl, MVT::i32),
- InFlag};
- SDValue retval = DAG.getMemIntrinsicNode(
- NVPTXISD::LoadParam, dl,
- DAG.getVTList(LoadRetVTs), LoadRetOps,
- TheLoadType, MachinePointerInfo(), AlignI);
- Chain = retval.getValue(1);
- InFlag = retval.getValue(2);
- SDValue Ret0 = retval.getValue(0);
- if (needTruncate)
- Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0);
- InVals.push_back(Ret0);
+ SDValue LoadOperands[] = {
+ Chain, DAG.getConstant(1, dl, MVT::i32),
+ DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag};
+ SDValue RetVal = DAG.getMemIntrinsicNode(
+ Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
+ MachinePointerInfo(), EltAlign);
+
+ for (unsigned j = 0; j < NumElts; ++j) {
+ SDValue Ret = RetVal.getValue(j);
+ if (needTruncate)
+ Ret = DAG.getNode(ISD::TRUNCATE, dl, Ins[VecIdx + j].VT, Ret);
+ InVals.push_back(Ret);
+ }
+ Chain = RetVal.getValue(NumElts);
+ InFlag = RetVal.getValue(NumElts + 1);
+
+ // Cleanup
+ VecIdx = -1;
+ LoadVTs.clear();
}
}
}
@@ -1752,6 +1847,55 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
}
+// We can init constant f16x2 with a single .b32 move. Normally it
+// would get lowered as two constant loads and vector-packing move.
+// mov.b16 %h1, 0x4000;
+// mov.b16 %h2, 0x3C00;
+// mov.b32 %hh2, {%h2, %h1};
+// Instead we want just a constant move:
+// mov.b32 %hh2, 0x40003C00
+//
+// This results in better SASS code with CUDA 7.x. Ptxas in CUDA 8.0
+// generates good SASS in both cases.
+SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ //return Op;
+ if (!(Op->getValueType(0) == MVT::v2f16 &&
+ isa<ConstantFPSDNode>(Op->getOperand(0)) &&
+ isa<ConstantFPSDNode>(Op->getOperand(1))))
+ return Op;
+
+ APInt E0 =
+ cast<ConstantFPSDNode>(Op->getOperand(0))->getValueAPF().bitcastToAPInt();
+ APInt E1 =
+ cast<ConstantFPSDNode>(Op->getOperand(1))->getValueAPF().bitcastToAPInt();
+ SDValue Const =
+ DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32);
+ return DAG.getNode(ISD::BITCAST, SDLoc(Op), MVT::v2f16, Const);
+}
+
+SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Index = Op->getOperand(1);
+ // Constant index will be matched by tablegen.
+ if (isa<ConstantSDNode>(Index.getNode()))
+ return Op;
+
+ // Extract individual elements and select one of them.
+ SDValue Vector = Op->getOperand(0);
+ EVT VectorVT = Vector.getValueType();
+ assert(VectorVT == MVT::v2f16 && "Unexpected vector type.");
+ EVT EltVT = VectorVT.getVectorElementType();
+
+ SDLoc dl(Op.getNode());
+ SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
+ DAG.getIntPtrConstant(0, dl));
+ SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
+ DAG.getIntPtrConstant(1, dl));
+ return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1,
+ ISD::CondCode::SETEQ);
+}
+
/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
/// amount, or
@@ -1885,8 +2029,11 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INTRINSIC_W_CHAIN:
return Op;
case ISD::BUILD_VECTOR:
+ return LowerBUILD_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return Op;
+ case ISD::EXTRACT_VECTOR_ELT:
+ return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CONCAT_VECTORS:
return LowerCONCAT_VECTORS(Op, DAG);
case ISD::STORE:
@@ -1924,8 +2071,21 @@ SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1)
return LowerLOADi1(Op, DAG);
- else
- return SDValue();
+
+ // v2f16 is legal, so we can't rely on legalizer to handle unaligned
+ // loads and have to handle it here.
+ if (Op.getValueType() == MVT::v2f16) {
+ LoadSDNode *Load = cast<LoadSDNode>(Op);
+ EVT MemVT = Load->getMemoryVT();
+ if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ Load->getAddressSpace(), Load->getAlignment())) {
+ SDValue Ops[2];
+ std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
+ return DAG.getMergeValues(Ops, SDLoc(Op));
+ }
+ }
+
+ return SDValue();
}
// v = ld i1* addr
@@ -1951,13 +2111,23 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
}
SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
- EVT ValVT = Op.getOperand(1).getValueType();
- if (ValVT == MVT::i1)
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ EVT VT = Store->getMemoryVT();
+
+ if (VT == MVT::i1)
return LowerSTOREi1(Op, DAG);
- else if (ValVT.isVector())
+
+ // v2f16 is legal, so we can't rely on legalizer to handle unaligned
+ // stores and have to handle it here.
+ if (VT == MVT::v2f16 &&
+ !allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ Store->getAddressSpace(), Store->getAlignment()))
+ return expandUnalignedStore(Store, DAG);
+
+ if (VT.isVector())
return LowerSTOREVector(Op, DAG);
- else
- return SDValue();
+
+ return SDValue();
}
SDValue
@@ -1980,12 +2150,15 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
case MVT::v2i16:
case MVT::v2i32:
case MVT::v2i64:
+ case MVT::v2f16:
case MVT::v2f32:
case MVT::v2f64:
case MVT::v4i8:
case MVT::v4i16:
case MVT::v4i32:
+ case MVT::v4f16:
case MVT::v4f32:
+ case MVT::v8f16: // <4 x f16x2>
// This is a "native" vector type
break;
}
@@ -2016,6 +2189,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
if (EltVT.getSizeInBits() < 16)
NeedExt = true;
+ bool StoreF16x2 = false;
switch (NumElts) {
default:
return SDValue();
@@ -2025,6 +2199,14 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
case 4:
Opcode = NVPTXISD::StoreV4;
break;
+ case 8:
+ // v8f16 is a special case. PTX doesn't have st.v8.f16
+ // instruction. Instead, we split the vector into v2f16 chunks and
+ // store them with st.v4.b32.
+ assert(EltVT == MVT::f16 && "Wrong type for the vector.");
+ Opcode = NVPTXISD::StoreV4;
+ StoreF16x2 = true;
+ break;
}
SmallVector<SDValue, 8> Ops;
@@ -2032,23 +2214,36 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
// First is the chain
Ops.push_back(N->getOperand(0));
- // Then the split values
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
- DAG.getIntPtrConstant(i, DL));
- if (NeedExt)
- ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
- Ops.push_back(ExtVal);
+ if (StoreF16x2) {
+ // Combine f16,f16 -> v2f16
+ NumElts /= 2;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
+ DAG.getIntPtrConstant(i * 2, DL));
+ SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
+ DAG.getIntPtrConstant(i * 2 + 1, DL));
+ SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f16, E0, E1);
+ Ops.push_back(V2);
+ }
+ } else {
+ // Then the split values
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
+ DAG.getIntPtrConstant(i, DL));
+ if (NeedExt)
+ ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
+ Ops.push_back(ExtVal);
+ }
}
// Then any remaining arguments
Ops.append(N->op_begin() + 2, N->op_end());
- SDValue NewSt = DAG.getMemIntrinsicNode(
- Opcode, DL, DAG.getVTList(MVT::Other), Ops,
- MemSD->getMemoryVT(), MemSD->getMemOperand());
+ SDValue NewSt =
+ DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops,
+ MemSD->getMemoryVT(), MemSD->getMemOperand());
- //return DCI.CombineTo(N, NewSt, true);
+ // return DCI.CombineTo(N, NewSt, true);
return NewSt;
}
@@ -2120,7 +2315,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
auto PtrVT = getPointerTy(DAG.getDataLayout());
const Function *F = MF.getFunction();
- const AttributeSet &PAL = F->getAttributes();
+ const AttributeList &PAL = F->getAttributes();
const TargetLowering *TLI = STI.getTargetLowering();
SDValue Root = DAG.getRoot();
@@ -2200,177 +2395,80 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// to newly created nodes. The SDNodes for params have to
// appear in the same order as their order of appearance
// in the original function. "idx+1" holds that order.
- if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) {
- if (Ty->isAggregateType()) {
- SmallVector<EVT, 16> vtparts;
- SmallVector<uint64_t, 16> offsets;
+ if (!PAL.hasParamAttribute(i, Attribute::ByVal)) {
+ bool aggregateIsPacked = false;
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ aggregateIsPacked = STy->isPacked();
- // NOTE: Here, we lose the ability to issue vector loads for vectors
- // that are a part of a struct. This should be investigated in the
- // future.
- ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets,
- 0);
- assert(vtparts.size() > 0 && "empty aggregate type not expected");
- bool aggregateIsPacked = false;
- if (StructType *STy = dyn_cast<StructType>(Ty))
- aggregateIsPacked = STy->isPacked();
+ SmallVector<EVT, 16> VTs;
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
+ assert(VTs.size() > 0 && "Unexpected empty type.");
+ auto VectorInfo =
+ VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty));
- SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
- for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
- ++parti) {
- EVT partVT = vtparts[parti];
- Value *srcValue = Constant::getNullValue(
- PointerType::get(partVT.getTypeForEVT(F->getContext()),
- ADDRESS_SPACE_PARAM));
- SDValue srcAddr =
- DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
- DAG.getConstant(offsets[parti], dl, PtrVT));
- unsigned partAlign = aggregateIsPacked
- ? 1
- : DL.getABITypeAlignment(
- partVT.getTypeForEVT(F->getContext()));
- SDValue p;
- if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) {
- ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
- ISD::SEXTLOAD : ISD::ZEXTLOAD;
- p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
- MachinePointerInfo(srcValue), partVT, partAlign);
- } else {
- p = DAG.getLoad(partVT, dl, Root, srcAddr,
- MachinePointerInfo(srcValue), partAlign);
- }
- if (p.getNode())
- p.getNode()->setIROrder(idx + 1);
- InVals.push_back(p);
- ++InsIdx;
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
+ int VecIdx = -1; // Index of the first element of the current vector.
+ for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) {
+ if (VectorInfo[parti] & PVF_FIRST) {
+ assert(VecIdx == -1 && "Orphaned vector.");
+ VecIdx = parti;
}
- if (vtparts.size() > 0)
- --InsIdx;
- continue;
- }
- if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(DL, Ty);
- SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
- unsigned NumElts = ObjectVT.getVectorNumElements();
- assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
- "Vector was not scalarized");
- EVT EltVT = ObjectVT.getVectorElementType();
-
- // V1 load
- // f32 = load ...
- if (NumElts == 1) {
- // We only have one element, so just directly load it
- Value *SrcValue = Constant::getNullValue(PointerType::get(
- EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
- SDValue P = DAG.getLoad(
- EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue),
- DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())),
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
- if (P.getNode())
- P.getNode()->setIROrder(idx + 1);
- if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
- P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P);
- InVals.push_back(P);
- ++InsIdx;
- } else if (NumElts == 2) {
- // V2 load
- // f32,f32 = load ...
- EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
- Value *SrcValue = Constant::getNullValue(PointerType::get(
- VecVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
- SDValue P = DAG.getLoad(
- VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue),
- DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())),
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
+ // That's the last element of this store op.
+ if (VectorInfo[parti] & PVF_LAST) {
+ unsigned NumElts = parti - VecIdx + 1;
+ EVT EltVT = VTs[parti];
+ // i1 is loaded/stored as i8.
+ EVT LoadVT = EltVT;
+ if (EltVT == MVT::i1)
+ LoadVT = MVT::i8;
+ else if (EltVT == MVT::v2f16)
+ // getLoad needs a vector type, but it can't handle
+ // vectors which contain v2f16 elements. So we must load
+ // using i32 here and then bitcast back.
+ LoadVT = MVT::i32;
+
+ EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts);
+ SDValue VecAddr =
+ DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
+ DAG.getConstant(Offsets[VecIdx], dl, PtrVT));
+ Value *srcValue = Constant::getNullValue(PointerType::get(
+ EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
+ SDValue P =
+ DAG.getLoad(VecVT, dl, Root, VecAddr,
+ MachinePointerInfo(srcValue), aggregateIsPacked,
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant);
if (P.getNode())
P.getNode()->setIROrder(idx + 1);
-
- SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
- DAG.getIntPtrConstant(0, dl));
- SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
- DAG.getIntPtrConstant(1, dl));
-
- if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) {
- Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0);
- Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1);
- }
-
- InVals.push_back(Elt0);
- InVals.push_back(Elt1);
- InsIdx += 2;
- } else {
- // V4 loads
- // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
- // the vector will be expanded to a power of 2 elements, so we know we
- // can always round up to the next multiple of 4 when creating the
- // vector loads.
- // e.g. 4 elem => 1 ld.v4
- // 6 elem => 2 ld.v4
- // 8 elem => 2 ld.v4
- // 11 elem => 3 ld.v4
- unsigned VecSize = 4;
- if (EltVT.getSizeInBits() == 64) {
- VecSize = 2;
- }
- EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
- unsigned Ofst = 0;
- for (unsigned i = 0; i < NumElts; i += VecSize) {
- Value *SrcValue = Constant::getNullValue(
- PointerType::get(VecVT.getTypeForEVT(F->getContext()),
- ADDRESS_SPACE_PARAM));
- SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
- DAG.getConstant(Ofst, dl, PtrVT));
- SDValue P = DAG.getLoad(
- VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue),
- DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())),
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
- if (P.getNode())
- P.getNode()->setIROrder(idx + 1);
-
- for (unsigned j = 0; j < VecSize; ++j) {
- if (i + j >= NumElts)
- break;
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
- DAG.getIntPtrConstant(j, dl));
- if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
- Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
- InVals.push_back(Elt);
+ for (unsigned j = 0; j < NumElts; ++j) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P,
+ DAG.getIntPtrConstant(j, dl));
+ // We've loaded i1 as an i8 and now must truncate it back to i1
+ if (EltVT == MVT::i1)
+ Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt);
+ // v2f16 was loaded as an i32. Now we must bitcast it back.
+ else if (EltVT == MVT::v2f16)
+ Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt);
+ // Extend the element if necesary (e.g. an i8 is loaded
+ // into an i16 register)
+ if (Ins[InsIdx].VT.isInteger() &&
+ Ins[InsIdx].VT.getSizeInBits() > LoadVT.getSizeInBits()) {
+ unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt);
}
- Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+ InVals.push_back(Elt);
}
- InsIdx += NumElts;
- }
- if (NumElts > 0)
- --InsIdx;
- continue;
- }
- // A plain scalar.
- EVT ObjectVT = getValueType(DL, Ty);
- // If ABI, load from the param symbol
- SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
- Value *srcValue = Constant::getNullValue(PointerType::get(
- ObjectVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
- SDValue p;
- if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
- ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
- ISD::SEXTLOAD : ISD::ZEXTLOAD;
- p = DAG.getExtLoad(
- ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue),
- ObjectVT,
- DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
- } else {
- p = DAG.getLoad(
- Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue),
- DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+ // Reset vector tracking state.
+ VecIdx = -1;
+ }
+ ++InsIdx;
}
- if (p.getNode())
- p.getNode()->setIROrder(idx + 1);
- InVals.push_back(p);
+ if (VTs.size() > 0)
+ --InsIdx;
continue;
}
@@ -2412,164 +2510,77 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const Function *F = MF.getFunction();
- Type *RetTy = F->getReturnType();
- const DataLayout &TD = DAG.getDataLayout();
+ Type *RetTy = MF.getFunction()->getReturnType();
bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
- if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) {
- // If we have a vector type, the OutVals array will be the scalarized
- // components and we have combine them into 1 or more vector stores.
- unsigned NumElts = VTy->getNumElements();
- assert(NumElts == Outs.size() && "Bad scalarization of return value");
+ const DataLayout DL = DAG.getDataLayout();
+ SmallVector<EVT, 16> VTs;
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
+ assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
+
+ auto VectorInfo = VectorizePTXValueVTs(
+ VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1);
+
+ // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
+ // 32-bits are sign extended or zero extended, depending on whether
+ // they are signed or unsigned types.
+ bool ExtendIntegerRetVal =
+ RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
+
+ SmallVector<SDValue, 6> StoreOperands;
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ // New load/store. Record chain and offset operands.
+ if (VectorInfo[i] & PVF_FIRST) {
+ assert(StoreOperands.empty() && "Orphaned operand list.");
+ StoreOperands.push_back(Chain);
+ StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32));
+ }
- // const_cast can be removed in later LLVM versions
- EVT EltVT = getValueType(TD, RetTy).getVectorElementType();
- bool NeedExtend = false;
- if (EltVT.getSizeInBits() < 16)
- NeedExtend = true;
-
- // V1 store
- if (NumElts == 1) {
- SDValue StoreVal = OutVals[0];
- // We only have one element, so just directly store it
- if (NeedExtend)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
- SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
- DAG.getVTList(MVT::Other), Ops,
- EltVT, MachinePointerInfo());
- } else if (NumElts == 2) {
- // V2 store
- SDValue StoreVal0 = OutVals[0];
- SDValue StoreVal1 = OutVals[1];
-
- if (NeedExtend) {
- StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0);
- StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1);
- }
+ SDValue RetVal = OutVals[i];
+ if (ExtendIntegerRetVal) {
+ RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND,
+ dl, MVT::i32, RetVal);
+ } else if (RetVal.getValueSizeInBits() < 16) {
+ // Use 16-bit registers for small load-stores as it's the
+ // smallest general purpose register size supported by NVPTX.
+ RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal);
+ }
- SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0,
- StoreVal1 };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl,
- DAG.getVTList(MVT::Other), Ops,
- EltVT, MachinePointerInfo());
- } else {
- // V4 stores
- // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the
- // vector will be expanded to a power of 2 elements, so we know we can
- // always round up to the next multiple of 4 when creating the vector
- // stores.
- // e.g. 4 elem => 1 st.v4
- // 6 elem => 2 st.v4
- // 8 elem => 2 st.v4
- // 11 elem => 3 st.v4
-
- unsigned VecSize = 4;
- if (OutVals[0].getValueSizeInBits() == 64)
- VecSize = 2;
-
- unsigned Offset = 0;
-
- EVT VecVT =
- EVT::getVectorVT(F->getContext(), EltVT, VecSize);
- unsigned PerStoreOffset =
- TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
-
- for (unsigned i = 0; i < NumElts; i += VecSize) {
- // Get values
- SDValue StoreVal;
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32));
- unsigned Opc = NVPTXISD::StoreRetvalV2;
- EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType();
-
- StoreVal = OutVals[i];
- if (NeedExtend)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
- Ops.push_back(StoreVal);
-
- if (i + 1 < NumElts) {
- StoreVal = OutVals[i + 1];
- if (NeedExtend)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
- } else {
- StoreVal = DAG.getUNDEF(ExtendedVT);
- }
- Ops.push_back(StoreVal);
-
- if (VecSize == 4) {
- Opc = NVPTXISD::StoreRetvalV4;
- if (i + 2 < NumElts) {
- StoreVal = OutVals[i + 2];
- if (NeedExtend)
- StoreVal =
- DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
- } else {
- StoreVal = DAG.getUNDEF(ExtendedVT);
- }
- Ops.push_back(StoreVal);
-
- if (i + 3 < NumElts) {
- StoreVal = OutVals[i + 3];
- if (NeedExtend)
- StoreVal =
- DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
- } else {
- StoreVal = DAG.getUNDEF(ExtendedVT);
- }
- Ops.push_back(StoreVal);
- }
+ // Record the value to return.
+ StoreOperands.push_back(RetVal);
- // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());
- Chain =
- DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops,
- EltVT, MachinePointerInfo());
- Offset += PerStoreOffset;
- }
- }
- } else {
- SmallVector<EVT, 16> ValVTs;
- SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0);
- assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
-
- for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- SDValue theVal = OutVals[i];
- EVT TheValType = theVal.getValueType();
- unsigned numElems = 1;
- if (TheValType.isVector())
- numElems = TheValType.getVectorNumElements();
- for (unsigned j = 0, je = numElems; j != je; ++j) {
- SDValue TmpVal = theVal;
- if (TheValType.isVector())
- TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- TheValType.getVectorElementType(), TmpVal,
- DAG.getIntPtrConstant(j, dl));
- EVT TheStoreType = ValVTs[i];
- if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) {
- // The following zero-extension is for integer types only, and
- // specifically not for aggregates.
- TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
- TheStoreType = MVT::i32;
- }
- else if (TmpVal.getValueSizeInBits() < 16)
- TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
-
- SDValue Ops[] = {
- Chain,
- DAG.getConstant(Offsets[i], dl, MVT::i32),
- TmpVal };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
- DAG.getVTList(MVT::Other), Ops,
- TheStoreType,
- MachinePointerInfo());
+ // That's the last element of this store op.
+ if (VectorInfo[i] & PVF_LAST) {
+ NVPTXISD::NodeType Op;
+ unsigned NumElts = StoreOperands.size() - 2;
+ switch (NumElts) {
+ case 1:
+ Op = NVPTXISD::StoreRetval;
+ break;
+ case 2:
+ Op = NVPTXISD::StoreRetvalV2;
+ break;
+ case 4:
+ Op = NVPTXISD::StoreRetvalV4;
+ break;
+ default:
+ llvm_unreachable("Invalid vector info.");
}
+
+ // Adjust type of load/store op if we've extended the scalar
+ // return value.
+ EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
+ Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other),
+ StoreOperands, TheStoreType,
+ MachinePointerInfo(), 1);
+ // Cleanup vector state.
+ StoreOperands.clear();
}
}
@@ -3863,27 +3874,35 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
CodeGenOpt::Level OptLevel) const {
- const Function *F = MF.getFunction();
- const TargetOptions &TO = MF.getTarget().Options;
-
// Always honor command-line argument
- if (FMAContractLevelOpt.getNumOccurrences() > 0) {
+ if (FMAContractLevelOpt.getNumOccurrences() > 0)
return FMAContractLevelOpt > 0;
- } else if (OptLevel == 0) {
- // Do not contract if we're not optimizing the code
+
+ // Do not contract if we're not optimizing the code.
+ if (OptLevel == 0)
return false;
- } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) {
- // Honor TargetOptions flags that explicitly say fusion is okay
+
+ // Honor TargetOptions flags that explicitly say fusion is okay.
+ if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
return true;
- } else if (F->hasFnAttribute("unsafe-fp-math")) {
- // Check for unsafe-fp-math=true coming from Clang
+
+ return allowUnsafeFPMath(MF);
+}
+
+bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const {
+ // Honor TargetOptions flags that explicitly say unsafe math is okay.
+ if (MF.getTarget().Options.UnsafeFPMath)
+ return true;
+
+ // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
+ const Function *F = MF.getFunction();
+ if (F->hasFnAttribute("unsafe-fp-math")) {
Attribute Attr = F->getFnAttribute("unsafe-fp-math");
StringRef Val = Attr.getValueAsString();
if (Val == "true")
return true;
}
- // We did not have a clear indication that fusion is allowed, so assume not
return false;
}
@@ -4088,67 +4107,6 @@ static SDValue PerformANDCombine(SDNode *N,
return SDValue();
}
-static SDValue PerformSELECTCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- // Currently this detects patterns for integer min and max and
- // lowers them to PTX-specific intrinsics that enable hardware
- // support.
-
- const SDValue Cond = N->getOperand(0);
- if (Cond.getOpcode() != ISD::SETCC) return SDValue();
-
- const SDValue LHS = Cond.getOperand(0);
- const SDValue RHS = Cond.getOperand(1);
- const SDValue True = N->getOperand(1);
- const SDValue False = N->getOperand(2);
- if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
- return SDValue();
-
- const EVT VT = N->getValueType(0);
- if (VT != MVT::i32 && VT != MVT::i64) return SDValue();
-
- const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
- SDValue Larger; // The larger of LHS and RHS when condition is true.
- switch (CC) {
- case ISD::SETULT:
- case ISD::SETULE:
- case ISD::SETLT:
- case ISD::SETLE:
- Larger = RHS;
- break;
-
- case ISD::SETGT:
- case ISD::SETGE:
- case ISD::SETUGT:
- case ISD::SETUGE:
- Larger = LHS;
- break;
-
- default:
- return SDValue();
- }
- const bool IsMax = (Larger == True);
- const bool IsSigned = ISD::isSignedIntSetCC(CC);
-
- unsigned IntrinsicId;
- if (VT == MVT::i32) {
- if (IsSigned)
- IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i;
- else
- IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui;
- } else {
- assert(VT == MVT::i64);
- if (IsSigned)
- IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll;
- else
- IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull;
- }
-
- SDLoc DL(N);
- return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS);
-}
-
static SDValue PerformREMCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
CodeGenOpt::Level OptLevel) {
@@ -4344,6 +4302,27 @@ static SDValue PerformSHLCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformSETCCCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT CCType = N->getValueType(0);
+ SDValue A = N->getOperand(0);
+ SDValue B = N->getOperand(1);
+
+ if (CCType != MVT::v2i1 || A.getValueType() != MVT::v2f16)
+ return SDValue();
+
+ SDLoc DL(N);
+ // setp.f16x2 returns two scalar predicates, which we need to
+ // convert back to v2i1. The returned result will be scalarized by
+ // the legalizer, but the comparison will remain a single vector
+ // instruction.
+ SDValue CCNode = DCI.DAG.getNode(NVPTXISD::SETP_F16X2, DL,
+ DCI.DAG.getVTList(MVT::i1, MVT::i1),
+ {A, B, N->getOperand(2)});
+ return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0),
+ CCNode.getValue(1));
+}
+
SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
@@ -4358,11 +4337,11 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
return PerformSHLCombine(N, DCI, OptLevel);
case ISD::AND:
return PerformANDCombine(N, DCI);
- case ISD::SELECT:
- return PerformSELECTCombine(N, DCI);
case ISD::UREM:
case ISD::SREM:
return PerformREMCombine(N, DCI, OptLevel);
+ case ISD::SETCC:
+ return PerformSETCCCombine(N, DCI);
}
return SDValue();
}
@@ -4386,12 +4365,15 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
case MVT::v2i16:
case MVT::v2i32:
case MVT::v2i64:
+ case MVT::v2f16:
case MVT::v2f32:
case MVT::v2f64:
case MVT::v4i8:
case MVT::v4i16:
case MVT::v4i32:
+ case MVT::v4f16:
case MVT::v4f32:
+ case MVT::v8f16: // <4 x f16x2>
// This is a "native" vector type
break;
}
@@ -4425,6 +4407,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
unsigned Opcode = 0;
SDVTList LdResVTs;
+ bool LoadF16x2 = false;
switch (NumElts) {
default:
@@ -4439,6 +4422,18 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
LdResVTs = DAG.getVTList(ListVTs);
break;
}
+ case 8: {
+ // v8f16 is a special case. PTX doesn't have ld.v8.f16
+ // instruction. Instead, we split the vector into v2f16 chunks and
+ // load them with ld.v4.b32.
+ assert(EltVT == MVT::f16 && "Unsupported v8 vector type.");
+ LoadF16x2 = true;
+ Opcode = NVPTXISD::LoadV4;
+ EVT ListVTs[] = {MVT::v2f16, MVT::v2f16, MVT::v2f16, MVT::v2f16,
+ MVT::Other};
+ LdResVTs = DAG.getVTList(ListVTs);
+ break;
+ }
}
// Copy regular operands
@@ -4452,13 +4447,26 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
LD->getMemoryVT(),
LD->getMemOperand());
- SmallVector<SDValue, 4> ScalarRes;
-
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue Res = NewLD.getValue(i);
- if (NeedTrunc)
- Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
- ScalarRes.push_back(Res);
+ SmallVector<SDValue, 8> ScalarRes;
+ if (LoadF16x2) {
+ // Split v2f16 subvectors back into individual elements.
+ NumElts /= 2;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue SubVector = NewLD.getValue(i);
+ SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
+ DAG.getIntPtrConstant(1, DL));
+ ScalarRes.push_back(E0);
+ ScalarRes.push_back(E1);
+ }
+ } else {
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Res = NewLD.getValue(i);
+ if (NeedTrunc)
+ Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+ ScalarRes.push_back(Res);
+ }
}
SDValue LoadChain = NewLD.getValue(NumElts);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index e433aed7781b..9d7b70d80c11 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -56,6 +56,7 @@ enum NodeType : unsigned {
MUL_WIDE_SIGNED,
MUL_WIDE_UNSIGNED,
IMAD,
+ SETP_F16X2,
Dummy,
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -73,7 +74,7 @@ enum NodeType : unsigned {
StoreParamV2,
StoreParamV4,
StoreParamS32, // to sext and store a <32bit value, not used currently
- StoreParamU32, // to zext and store a <32bit value, not used currently
+ StoreParamU32, // to zext and store a <32bit value, not used currently
StoreRetval,
StoreRetvalV2,
StoreRetvalV4,
@@ -510,17 +511,48 @@ public:
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
+ // Get the degree of precision we want from 32-bit floating point division
+ // operations.
+ //
+ // 0 - Use ptx div.approx
+ // 1 - Use ptx.div.full (approximate, but less so than div.approx)
+ // 2 - Use IEEE-compliant div instructions, if available.
+ int getDivF32Level() const;
+
+ // Get whether we should use a precise or approximate 32-bit floating point
+ // sqrt instruction.
+ bool usePrecSqrtF32() const;
+
+ // Get whether we should use instructions that flush floating-point denormals
+ // to sign-preserving zero.
+ bool useF32FTZ(const MachineFunction &MF) const;
+
+ SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &ExtraSteps, bool &UseOneConst,
+ bool Reciprocal) const override;
+
+ unsigned combineRepeatedFPDivisors() const override { return 2; }
+
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
+ bool allowUnsafeFPMath(MachineFunction &MF) const;
bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
+ // The default is to transform llvm.ctlz(x, false) (where false indicates that
+ // x == 0 is not undefined behavior) into a branch that checks whether x is 0
+ // and avoids calling ctlz in that case. We have a dedicated ctlz
+ // instruction, so we say that ctlz is cheap to speculate.
+ bool isCheapToSpeculateCtlz() const override { return true; }
+
private:
const NVPTXSubtarget &STI; // cache the subtarget here
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index 8d00bbb5e9c2..f12ed81b6d9f 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -96,9 +96,7 @@ bool NVPTXImageOptimizer::replaceIsTypePSampler(Instruction &I) {
// This is an OpenCL sampler, so it must be a samplerref
replaceWith(&I, ConstantInt::getTrue(I.getContext()));
return true;
- } else if (isImageWriteOnly(*TexHandle) ||
- isImageReadWrite(*TexHandle) ||
- isImageReadOnly(*TexHandle)) {
+ } else if (isImage(*TexHandle)) {
// This is an OpenCL image, so it cannot be a samplerref
replaceWith(&I, ConstantInt::getFalse(I.getContext()));
return true;
diff --git a/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp b/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp
deleted file mode 100644
index f4940c937a2d..000000000000
--- a/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp
+++ /dev/null
@@ -1,583 +0,0 @@
-//===-- NVPTXInferAddressSpace.cpp - ---------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// CUDA C/C++ includes memory space designation as variable type qualifers (such
-// as __global__ and __shared__). Knowing the space of a memory access allows
-// CUDA compilers to emit faster PTX loads and stores. For example, a load from
-// shared memory can be translated to `ld.shared` which is roughly 10% faster
-// than a generic `ld` on an NVIDIA Tesla K40c.
-//
-// Unfortunately, type qualifiers only apply to variable declarations, so CUDA
-// compilers must infer the memory space of an address expression from
-// type-qualified variables.
-//
-// LLVM IR uses non-zero (so-called) specific address spaces to represent memory
-// spaces (e.g. addrspace(3) means shared memory). The Clang frontend
-// places only type-qualified variables in specific address spaces, and then
-// conservatively `addrspacecast`s each type-qualified variable to addrspace(0)
-// (so-called the generic address space) for other instructions to use.
-//
-// For example, the Clang translates the following CUDA code
-// __shared__ float a[10];
-// float v = a[i];
-// to
-// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
-// %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i
-// %v = load float, float* %1 ; emits ld.f32
-// @a is in addrspace(3) since it's type-qualified, but its use from %1 is
-// redirected to %0 (the generic version of @a).
-//
-// The optimization implemented in this file propagates specific address spaces
-// from type-qualified variable declarations to its users. For example, it
-// optimizes the above IR to
-// %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
-// %v = load float addrspace(3)* %1 ; emits ld.shared.f32
-// propagating the addrspace(3) from @a to %1. As the result, the NVPTX
-// codegen is able to emit ld.shared.f32 for %v.
-//
-// Address space inference works in two steps. First, it uses a data-flow
-// analysis to infer as many generic pointers as possible to point to only one
-// specific address space. In the above example, it can prove that %1 only
-// points to addrspace(3). This algorithm was published in
-// CUDA: Compiling and optimizing for a GPU platform
-// Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang
-// ICCS 2012
-//
-// Then, address space inference replaces all refinable generic pointers with
-// equivalent specific pointers.
-//
-// The major challenge of implementing this optimization is handling PHINodes,
-// which may create loops in the data flow graph. This brings two complications.
-//
-// First, the data flow analysis in Step 1 needs to be circular. For example,
-// %generic.input = addrspacecast float addrspace(3)* %input to float*
-// loop:
-// %y = phi [ %generic.input, %y2 ]
-// %y2 = getelementptr %y, 1
-// %v = load %y2
-// br ..., label %loop, ...
-// proving %y specific requires proving both %generic.input and %y2 specific,
-// but proving %y2 specific circles back to %y. To address this complication,
-// the data flow analysis operates on a lattice:
-// uninitialized > specific address spaces > generic.
-// All address expressions (our implementation only considers phi, bitcast,
-// addrspacecast, and getelementptr) start with the uninitialized address space.
-// The monotone transfer function moves the address space of a pointer down a
-// lattice path from uninitialized to specific and then to generic. A join
-// operation of two different specific address spaces pushes the expression down
-// to the generic address space. The analysis completes once it reaches a fixed
-// point.
-//
-// Second, IR rewriting in Step 2 also needs to be circular. For example,
-// converting %y to addrspace(3) requires the compiler to know the converted
-// %y2, but converting %y2 needs the converted %y. To address this complication,
-// we break these cycles using "undef" placeholders. When converting an
-// instruction `I` to a new address space, if its operand `Op` is not converted
-// yet, we let `I` temporarily use `undef` and fix all the uses of undef later.
-// For instance, our algorithm first converts %y to
-// %y' = phi float addrspace(3)* [ %input, undef ]
-// Then, it converts %y2 to
-// %y2' = getelementptr %y', 1
-// Finally, it fixes the undef in %y' so that
-// %y' = phi float addrspace(3)* [ %input, %y2' ]
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "nvptx-infer-addrspace"
-
-#include "NVPTX.h"
-#include "MCTargetDesc/NVPTXBaseInfo.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-
-using namespace llvm;
-
-namespace {
-const unsigned ADDRESS_SPACE_UNINITIALIZED = (unsigned)-1;
-
-using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
-
-/// \brief NVPTXInferAddressSpaces
-class NVPTXInferAddressSpaces: public FunctionPass {
-public:
- static char ID;
-
- NVPTXInferAddressSpaces() : FunctionPass(ID) {}
-
- bool runOnFunction(Function &F) override;
-
-private:
- // Returns the new address space of V if updated; otherwise, returns None.
- Optional<unsigned>
- updateAddressSpace(const Value &V,
- const ValueToAddrSpaceMapTy &InferredAddrSpace);
-
- // Tries to infer the specific address space of each address expression in
- // Postorder.
- void inferAddressSpaces(const std::vector<Value *> &Postorder,
- ValueToAddrSpaceMapTy *InferredAddrSpace);
-
- // Changes the generic address expressions in function F to point to specific
- // address spaces if InferredAddrSpace says so. Postorder is the postorder of
- // all generic address expressions in the use-def graph of function F.
- bool
- rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace,
- Function *F);
-};
-} // end anonymous namespace
-
-char NVPTXInferAddressSpaces::ID = 0;
-
-namespace llvm {
-void initializeNVPTXInferAddressSpacesPass(PassRegistry &);
-}
-INITIALIZE_PASS(NVPTXInferAddressSpaces, "nvptx-infer-addrspace",
- "Infer address spaces",
- false, false)
-
-// Returns true if V is an address expression.
-// TODO: Currently, we consider only phi, bitcast, addrspacecast, and
-// getelementptr operators.
-static bool isAddressExpression(const Value &V) {
- if (!isa<Operator>(V))
- return false;
-
- switch (cast<Operator>(V).getOpcode()) {
- case Instruction::PHI:
- case Instruction::BitCast:
- case Instruction::AddrSpaceCast:
- case Instruction::GetElementPtr:
- return true;
- default:
- return false;
- }
-}
-
-// Returns the pointer operands of V.
-//
-// Precondition: V is an address expression.
-static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
- assert(isAddressExpression(V));
- const Operator& Op = cast<Operator>(V);
- switch (Op.getOpcode()) {
- case Instruction::PHI: {
- auto IncomingValues = cast<PHINode>(Op).incoming_values();
- return SmallVector<Value *, 2>(IncomingValues.begin(),
- IncomingValues.end());
- }
- case Instruction::BitCast:
- case Instruction::AddrSpaceCast:
- case Instruction::GetElementPtr:
- return {Op.getOperand(0)};
- default:
- llvm_unreachable("Unexpected instruction type.");
- }
-}
-
-// If V is an unvisited generic address expression, appends V to PostorderStack
-// and marks it as visited.
-static void appendsGenericAddressExpressionToPostorderStack(
- Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
- DenseSet<Value *> *Visited) {
- assert(V->getType()->isPointerTy());
- if (isAddressExpression(*V) &&
- V->getType()->getPointerAddressSpace() ==
- AddressSpace::ADDRESS_SPACE_GENERIC) {
- if (Visited->insert(V).second)
- PostorderStack->push_back(std::make_pair(V, false));
- }
-}
-
-// Returns all generic address expressions in function F. The elements are
-// ordered in postorder.
-static std::vector<Value *> collectGenericAddressExpressions(Function &F) {
- // This function implements a non-recursive postorder traversal of a partial
- // use-def graph of function F.
- std::vector<std::pair<Value*, bool>> PostorderStack;
- // The set of visited expressions.
- DenseSet<Value*> Visited;
- // We only explore address expressions that are reachable from loads and
- // stores for now because we aim at generating faster loads and stores.
- for (Instruction &I : instructions(F)) {
- if (isa<LoadInst>(I)) {
- appendsGenericAddressExpressionToPostorderStack(
- I.getOperand(0), &PostorderStack, &Visited);
- } else if (isa<StoreInst>(I)) {
- appendsGenericAddressExpressionToPostorderStack(
- I.getOperand(1), &PostorderStack, &Visited);
- }
- }
-
- std::vector<Value *> Postorder; // The resultant postorder.
- while (!PostorderStack.empty()) {
- // If the operands of the expression on the top are already explored,
- // adds that expression to the resultant postorder.
- if (PostorderStack.back().second) {
- Postorder.push_back(PostorderStack.back().first);
- PostorderStack.pop_back();
- continue;
- }
- // Otherwise, adds its operands to the stack and explores them.
- PostorderStack.back().second = true;
- for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) {
- appendsGenericAddressExpressionToPostorderStack(
- PtrOperand, &PostorderStack, &Visited);
- }
- }
- return Postorder;
-}
-
-// A helper function for cloneInstructionWithNewAddressSpace. Returns the clone
-// of OperandUse.get() in the new address space. If the clone is not ready yet,
-// returns an undef in the new address space as a placeholder.
-static Value *operandWithNewAddressSpaceOrCreateUndef(
- const Use &OperandUse, unsigned NewAddrSpace,
- const ValueToValueMapTy &ValueWithNewAddrSpace,
- SmallVectorImpl<const Use *> *UndefUsesToFix) {
- Value *Operand = OperandUse.get();
- if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand))
- return NewOperand;
-
- UndefUsesToFix->push_back(&OperandUse);
- return UndefValue::get(
- Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace));
-}
-
-// Returns a clone of `I` with its operands converted to those specified in
-// ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an
-// operand whose address space needs to be modified might not exist in
-// ValueWithNewAddrSpace. In that case, uses undef as a placeholder operand and
-// adds that operand use to UndefUsesToFix so that caller can fix them later.
-//
-// Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast
-// from a pointer whose type already matches. Therefore, this function returns a
-// Value* instead of an Instruction*.
-static Value *cloneInstructionWithNewAddressSpace(
- Instruction *I, unsigned NewAddrSpace,
- const ValueToValueMapTy &ValueWithNewAddrSpace,
- SmallVectorImpl<const Use *> *UndefUsesToFix) {
- Type *NewPtrType =
- I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
-
- if (I->getOpcode() == Instruction::AddrSpaceCast) {
- Value *Src = I->getOperand(0);
- // Because `I` is generic, the source address space must be specific.
- // Therefore, the inferred address space must be the source space, according
- // to our algorithm.
- assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
- if (Src->getType() != NewPtrType)
- return new BitCastInst(Src, NewPtrType);
- return Src;
- }
-
- // Computes the converted pointer operands.
- SmallVector<Value *, 4> NewPointerOperands;
- for (const Use &OperandUse : I->operands()) {
- if (!OperandUse.get()->getType()->isPointerTy())
- NewPointerOperands.push_back(nullptr);
- else
- NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef(
- OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix));
- }
-
- switch (I->getOpcode()) {
- case Instruction::BitCast:
- return new BitCastInst(NewPointerOperands[0], NewPtrType);
- case Instruction::PHI: {
- assert(I->getType()->isPointerTy());
- PHINode *PHI = cast<PHINode>(I);
- PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues());
- for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) {
- unsigned OperandNo = PHINode::getOperandNumForIncomingValue(Index);
- NewPHI->addIncoming(NewPointerOperands[OperandNo],
- PHI->getIncomingBlock(Index));
- }
- return NewPHI;
- }
- case Instruction::GetElementPtr: {
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
- GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
- GEP->getSourceElementType(), NewPointerOperands[0],
- SmallVector<Value *, 4>(GEP->idx_begin(), GEP->idx_end()));
- NewGEP->setIsInBounds(GEP->isInBounds());
- return NewGEP;
- }
- default:
- llvm_unreachable("Unexpected opcode");
- }
-}
-
-// Similar to cloneInstructionWithNewAddressSpace, returns a clone of the
-// constant expression `CE` with its operands replaced as specified in
-// ValueWithNewAddrSpace.
-static Value *cloneConstantExprWithNewAddressSpace(
- ConstantExpr *CE, unsigned NewAddrSpace,
- const ValueToValueMapTy &ValueWithNewAddrSpace) {
- Type *TargetType =
- CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
-
- if (CE->getOpcode() == Instruction::AddrSpaceCast) {
- // Because CE is generic, the source address space must be specific.
- // Therefore, the inferred address space must be the source space according
- // to our algorithm.
- assert(CE->getOperand(0)->getType()->getPointerAddressSpace() ==
- NewAddrSpace);
- return ConstantExpr::getBitCast(CE->getOperand(0), TargetType);
- }
-
- // Computes the operands of the new constant expression.
- SmallVector<Constant *, 4> NewOperands;
- for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) {
- Constant *Operand = CE->getOperand(Index);
- // If the address space of `Operand` needs to be modified, the new operand
- // with the new address space should already be in ValueWithNewAddrSpace
- // because (1) the constant expressions we consider (i.e. addrspacecast,
- // bitcast, and getelementptr) do not incur cycles in the data flow graph
- // and (2) this function is called on constant expressions in postorder.
- if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) {
- NewOperands.push_back(cast<Constant>(NewOperand));
- } else {
- // Otherwise, reuses the old operand.
- NewOperands.push_back(Operand);
- }
- }
-
- if (CE->getOpcode() == Instruction::GetElementPtr) {
- // Needs to specify the source type while constructing a getelementptr
- // constant expression.
- return CE->getWithOperands(
- NewOperands, TargetType, /*OnlyIfReduced=*/false,
- NewOperands[0]->getType()->getPointerElementType());
- }
-
- return CE->getWithOperands(NewOperands, TargetType);
-}
-
-// Returns a clone of the value `V`, with its operands replaced as specified in
-// ValueWithNewAddrSpace. This function is called on every generic address
-// expression whose address space needs to be modified, in postorder.
-//
-// See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix.
-static Value *
-cloneValueWithNewAddressSpace(Value *V, unsigned NewAddrSpace,
- const ValueToValueMapTy &ValueWithNewAddrSpace,
- SmallVectorImpl<const Use *> *UndefUsesToFix) {
- // All values in Postorder are generic address expressions.
- assert(isAddressExpression(*V) &&
- V->getType()->getPointerAddressSpace() ==
- AddressSpace::ADDRESS_SPACE_GENERIC);
-
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- Value *NewV = cloneInstructionWithNewAddressSpace(
- I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix);
- if (Instruction *NewI = dyn_cast<Instruction>(NewV)) {
- if (NewI->getParent() == nullptr) {
- NewI->insertBefore(I);
- NewI->takeName(I);
- }
- }
- return NewV;
- }
-
- return cloneConstantExprWithNewAddressSpace(
- cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace);
-}
-
-// Defines the join operation on the address space lattice (see the file header
-// comments).
-static unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) {
- if (AS1 == AddressSpace::ADDRESS_SPACE_GENERIC ||
- AS2 == AddressSpace::ADDRESS_SPACE_GENERIC)
- return AddressSpace::ADDRESS_SPACE_GENERIC;
-
- if (AS1 == ADDRESS_SPACE_UNINITIALIZED)
- return AS2;
- if (AS2 == ADDRESS_SPACE_UNINITIALIZED)
- return AS1;
-
- // The join of two different specific address spaces is generic.
- return AS1 == AS2 ? AS1 : (unsigned)AddressSpace::ADDRESS_SPACE_GENERIC;
-}
-
-bool NVPTXInferAddressSpaces::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- // Collects all generic address expressions in postorder.
- std::vector<Value *> Postorder = collectGenericAddressExpressions(F);
-
- // Runs a data-flow analysis to refine the address spaces of every expression
- // in Postorder.
- ValueToAddrSpaceMapTy InferredAddrSpace;
- inferAddressSpaces(Postorder, &InferredAddrSpace);
-
- // Changes the address spaces of the generic address expressions who are
- // inferred to point to a specific address space.
- return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
-}
-
-void NVPTXInferAddressSpaces::inferAddressSpaces(
- const std::vector<Value *> &Postorder,
- ValueToAddrSpaceMapTy *InferredAddrSpace) {
- SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
- // Initially, all expressions are in the uninitialized address space.
- for (Value *V : Postorder)
- (*InferredAddrSpace)[V] = ADDRESS_SPACE_UNINITIALIZED;
-
- while (!Worklist.empty()) {
- Value* V = Worklist.pop_back_val();
-
- // Tries to update the address space of the stack top according to the
- // address spaces of its operands.
- DEBUG(dbgs() << "Updating the address space of\n"
- << " " << *V << "\n");
- Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace);
- if (!NewAS.hasValue())
- continue;
- // If any updates are made, grabs its users to the worklist because
- // their address spaces can also be possibly updated.
- DEBUG(dbgs() << " to " << NewAS.getValue() << "\n");
- (*InferredAddrSpace)[V] = NewAS.getValue();
-
- for (Value *User : V->users()) {
- // Skip if User is already in the worklist.
- if (Worklist.count(User))
- continue;
-
- auto Pos = InferredAddrSpace->find(User);
- // Our algorithm only updates the address spaces of generic address
- // expressions, which are those in InferredAddrSpace.
- if (Pos == InferredAddrSpace->end())
- continue;
-
- // Function updateAddressSpace moves the address space down a lattice
- // path. Therefore, nothing to do if User is already inferred as
- // generic (the bottom element in the lattice).
- if (Pos->second == AddressSpace::ADDRESS_SPACE_GENERIC)
- continue;
-
- Worklist.insert(User);
- }
- }
-}
-
-Optional<unsigned> NVPTXInferAddressSpaces::updateAddressSpace(
- const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) {
- assert(InferredAddrSpace.count(&V));
-
- // The new inferred address space equals the join of the address spaces
- // of all its pointer operands.
- unsigned NewAS = ADDRESS_SPACE_UNINITIALIZED;
- for (Value *PtrOperand : getPointerOperands(V)) {
- unsigned OperandAS;
- if (InferredAddrSpace.count(PtrOperand))
- OperandAS = InferredAddrSpace.lookup(PtrOperand);
- else
- OperandAS = PtrOperand->getType()->getPointerAddressSpace();
- NewAS = joinAddressSpaces(NewAS, OperandAS);
- // join(generic, *) = generic. So we can break if NewAS is already generic.
- if (NewAS == AddressSpace::ADDRESS_SPACE_GENERIC)
- break;
- }
-
- unsigned OldAS = InferredAddrSpace.lookup(&V);
- assert(OldAS != AddressSpace::ADDRESS_SPACE_GENERIC);
- if (OldAS == NewAS)
- return None;
- return NewAS;
-}
-
-bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces(
- const std::vector<Value *> &Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) {
- // For each address expression to be modified, creates a clone of it with its
- // pointer operands converted to the new address space. Since the pointer
- // operands are converted, the clone is naturally in the new address space by
- // construction.
- ValueToValueMapTy ValueWithNewAddrSpace;
- SmallVector<const Use *, 32> UndefUsesToFix;
- for (Value* V : Postorder) {
- unsigned NewAddrSpace = InferredAddrSpace.lookup(V);
- if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
- ValueWithNewAddrSpace[V] = cloneValueWithNewAddressSpace(
- V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
- }
- }
-
- if (ValueWithNewAddrSpace.empty())
- return false;
-
- // Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace.
- for (const Use* UndefUse : UndefUsesToFix) {
- User *V = UndefUse->getUser();
- User *NewV = cast<User>(ValueWithNewAddrSpace.lookup(V));
- unsigned OperandNo = UndefUse->getOperandNo();
- assert(isa<UndefValue>(NewV->getOperand(OperandNo)));
- NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get()));
- }
-
- // Replaces the uses of the old address expressions with the new ones.
- for (Value *V : Postorder) {
- Value *NewV = ValueWithNewAddrSpace.lookup(V);
- if (NewV == nullptr)
- continue;
-
- SmallVector<Use *, 4> Uses;
- for (Use &U : V->uses())
- Uses.push_back(&U);
- DEBUG(dbgs() << "Replacing the uses of " << *V << "\n to\n " << *NewV
- << "\n");
- for (Use *U : Uses) {
- if (isa<LoadInst>(U->getUser()) ||
- (isa<StoreInst>(U->getUser()) && U->getOperandNo() == 1)) {
- // If V is used as the pointer operand of a load/store, sets the pointer
- // operand to NewV. This replacement does not change the element type,
- // so the resultant load/store is still valid.
- U->set(NewV);
- } else if (isa<Instruction>(U->getUser())) {
- // Otherwise, replaces the use with generic(NewV).
- // TODO: Some optimization opportunities are missed. For example, in
- // %0 = icmp eq float* %p, %q
- // if both p and q are inferred to be shared, we can rewrite %0 as
- // %0 = icmp eq float addrspace(3)* %new_p, %new_q
- // instead of currently
- // %generic_p = addrspacecast float addrspace(3)* %new_p to float*
- // %generic_q = addrspacecast float addrspace(3)* %new_q to float*
- // %0 = icmp eq float* %generic_p, %generic_q
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- BasicBlock::iterator InsertPos = std::next(I->getIterator());
- while (isa<PHINode>(InsertPos))
- ++InsertPos;
- U->set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
- } else {
- U->set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
- V->getType()));
- }
- }
- }
- if (V->use_empty())
- RecursivelyDeleteTriviallyDeadInstructions(V);
- }
-
- return true;
-}
-
-FunctionPass *llvm::createNVPTXInferAddressSpacesPass() {
- return new NVPTXInferAddressSpaces();
-}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 7f89742a3215..3026f0be242d 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -52,6 +52,11 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (DestRC == &NVPTX::Int64RegsRegClass) {
Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr
: NVPTX::BITCONVERT_64_F2I);
+ } else if (DestRC == &NVPTX::Float16RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Float16RegsRegClass ? NVPTX::FMOV16rr
+ : NVPTX::BITCONVERT_16_I2F);
+ } else if (DestRC == &NVPTX::Float16x2RegsRegClass) {
+ Op = NVPTX::IMOV32rr;
} else if (DestRC == &NVPTX::Float32RegsRegClass) {
Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr
: NVPTX::BITCONVERT_32_I2F);
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 0fbb0448e4c4..2b847414b8a8 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -18,6 +18,10 @@ let hasSideEffects = 0 in {
def NOP : NVPTXInst<(outs), (ins), "", []>;
}
+let OperandType = "OPERAND_IMMEDIATE" in {
+ def f16imm : Operand<f16>;
+}
+
// List of vector specific properties
def isVecLD : VecInstTypeEnum<1>;
def isVecST : VecInstTypeEnum<2>;
@@ -98,6 +102,9 @@ def CmpNAN_FTZ : PatLeaf<(i32 0x111)>;
def CmpMode : Operand<i32> {
let PrintMethod = "printCmpMode";
}
+def VecElement : Operand<i32> {
+ let PrintMethod = "printVecElement";
+}
//===----------------------------------------------------------------------===//
// NVPTX Instruction Predicate Definitions
@@ -134,6 +141,7 @@ def doMulWide : Predicate<"doMulWide">;
def allowFMA : Predicate<"allowFMA()">;
def noFMA : Predicate<"!allowFMA()">;
+def allowUnsafeFPMath : Predicate<"allowUnsafeFPMath()">;
def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
@@ -148,6 +156,7 @@ def true : Predicate<"true">;
def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
+def useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
//===----------------------------------------------------------------------===//
// Some Common Instruction Class Templates
@@ -239,11 +248,11 @@ multiclass F3<string OpcStr, SDNode OpNode> {
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
}
-// Template for instructions which take three fp64 or fp32 args. The
+// Template for instructions which take three FP args. The
// instructions are named "<OpcStr>.f<Width>" (e.g. "add.f64").
//
// Also defines ftz (flush subnormal inputs and results to sign-preserving
-// zero) variants for fp32 functions.
+// zero) variants for fp32/fp16 functions.
//
// This multiclass should be used for nodes that can be folded to make fma ops.
// In this case, we use the ".rn" variant when FMA is disabled, as this behaves
@@ -286,6 +295,32 @@ multiclass F3_fma_component<string OpcStr, SDNode OpNode> {
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
Requires<[allowFMA]>;
+ def f16rr_ftz :
+ NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b),
+ !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"),
+ [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>,
+ Requires<[useFP16Math, allowFMA, doF32FTZ]>;
+ def f16rr :
+ NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b),
+ !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"),
+ [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>,
+ Requires<[useFP16Math, allowFMA]>;
+
+ def f16x2rr_ftz :
+ NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"),
+ [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>,
+ Requires<[useFP16Math, allowFMA, doF32FTZ]>;
+ def f16x2rr :
+ NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"),
+ [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>,
+ Requires<[useFP16Math, allowFMA]>;
+
// These have strange names so we don't perturb existing mir tests.
def _rnf64rr :
NVPTXInst<(outs Float64Regs:$dst),
@@ -323,6 +358,30 @@ multiclass F3_fma_component<string OpcStr, SDNode OpNode> {
!strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
Requires<[noFMA]>;
+ def _rnf16rr_ftz :
+ NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b),
+ !strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"),
+ [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>,
+ Requires<[useFP16Math, noFMA, doF32FTZ]>;
+ def _rnf16rr :
+ NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b),
+ !strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"),
+ [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>,
+ Requires<[useFP16Math, noFMA]>;
+ def _rnf16x2rr_ftz :
+ NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ !strconcat(OpcStr, ".rn.ftz.f16x2 \t$dst, $a, $b;"),
+ [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>,
+ Requires<[useFP16Math, noFMA, doF32FTZ]>;
+ def _rnf16x2rr :
+ NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ !strconcat(OpcStr, ".rn.f16x2 \t$dst, $a, $b;"),
+ [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>,
+ Requires<[useFP16Math, noFMA]>;
}
// Template for operations which take two f32 or f64 operands. Provides three
@@ -358,57 +417,57 @@ let hasSideEffects = 0 in {
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s8\t$dst, $src;"), []>;
+ FromName, ".s8 \t$dst, $src;"), []>;
def _u8 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u8\t$dst, $src;"), []>;
+ FromName, ".u8 \t$dst, $src;"), []>;
def _s16 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s16\t$dst, $src;"), []>;
+ FromName, ".s16 \t$dst, $src;"), []>;
def _u16 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u16\t$dst, $src;"), []>;
- def _f16 :
- NVPTXInst<(outs RC:$dst),
- (ins Int16Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".f16\t$dst, $src;"), []>;
+ FromName, ".u16 \t$dst, $src;"), []>;
def _s32 :
NVPTXInst<(outs RC:$dst),
(ins Int32Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s32\t$dst, $src;"), []>;
+ FromName, ".s32 \t$dst, $src;"), []>;
def _u32 :
NVPTXInst<(outs RC:$dst),
(ins Int32Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u32\t$dst, $src;"), []>;
+ FromName, ".u32 \t$dst, $src;"), []>;
def _s64 :
NVPTXInst<(outs RC:$dst),
(ins Int64Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s64\t$dst, $src;"), []>;
+ FromName, ".s64 \t$dst, $src;"), []>;
def _u64 :
NVPTXInst<(outs RC:$dst),
(ins Int64Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u64\t$dst, $src;"), []>;
+ FromName, ".u64 \t$dst, $src;"), []>;
+ def _f16 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Float16Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".f16 \t$dst, $src;"), []>;
def _f32 :
NVPTXInst<(outs RC:$dst),
(ins Float32Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".f32\t$dst, $src;"), []>;
+ FromName, ".f32 \t$dst, $src;"), []>;
def _f64 :
NVPTXInst<(outs RC:$dst),
(ins Float64Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".f64\t$dst, $src;"), []>;
+ FromName, ".f64 \t$dst, $src;"), []>;
}
// Generate cvts from all types to all types.
@@ -416,11 +475,11 @@ let hasSideEffects = 0 in {
defm CVT_u8 : CVT_FROM_ALL<"u8", Int16Regs>;
defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
- defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>;
defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
+ defm CVT_f16 : CVT_FROM_ALL<"f16", Float16Regs>;
defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
@@ -458,7 +517,7 @@ multiclass ADD_SUB_i1<SDNode OpNode> {
defm ADD_i1 : ADD_SUB_i1<add>;
defm SUB_i1 : ADD_SUB_i1<sub>;
-// int16, int32, and int64 signed addition. Since nvptx is 2's compliment, we
+// int16, int32, and int64 signed addition. Since nvptx is 2's complement, we
// also use these for unsigned arithmetic.
defm ADD : I3<"add.s", add>;
defm SUB : I3<"sub.s", sub>;
@@ -485,6 +544,24 @@ defm UDIV : I3<"div.u", udiv>;
defm SREM : I3<"rem.s", srem>;
defm UREM : I3<"rem.u", urem>;
+// Integer absolute value. NumBits should be one minus the bit width of RC.
+// This idiom implements the algorithm at
+// http://graphics.stanford.edu/~seander/bithacks.html#IntegerAbs.
+multiclass ABS<RegisterClass RC, int NumBits, string SizeName> {
+ def : NVPTXInst<(outs RC:$dst), (ins RC:$a),
+ !strconcat("abs", SizeName, " \t$dst, $a;"),
+ [(set RC:$dst, (xor (add (sra RC:$a, (i32 NumBits)), RC:$a),
+ (sra RC:$a, (i32 NumBits))))]>;
+}
+defm ABS_16 : ABS<Int16Regs, 15, ".s16">;
+defm ABS_32 : ABS<Int32Regs, 31, ".s32">;
+defm ABS_64 : ABS<Int64Regs, 63, ".s64">;
+
+// Integer min/max.
+defm SMAX : I3<"max.s", smax>;
+defm UMAX : I3<"max.u", umax>;
+defm SMIN : I3<"min.s", smin>;
+defm UMIN : I3<"min.u", umin>;
//
// Wide multiplication
@@ -748,6 +825,15 @@ def DoubleConst1 : PatLeaf<(fpimm), [{
N->getValueAPF().convertToDouble() == 1.0;
}]>;
+// Loads FP16 constant into a register.
+//
+// ptxas does not have hex representation for fp16, so we can't use
+// fp16 immediate values in .f16 instructions. Instead we have to load
+// the constant into a register using mov.b16.
+def LOAD_CONST_F16 :
+ NVPTXInst<(outs Float16Regs:$dst), (ins f16imm:$a),
+ "mov.b16 \t$dst, $a;", []>;
+
defm FADD : F3_fma_component<"add", fadd>;
defm FSUB : F3_fma_component<"sub", fsub>;
defm FMUL : F3_fma_component<"mul", fmul>;
@@ -908,18 +994,9 @@ def FDIV32ri_prec :
Requires<[reqPTX20]>;
//
-// F32 rsqrt
+// FMA
//
-def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b),
- "rsqrt.approx.f32 \t$dst, $b;", []>;
-
-// Convert 1.0f/sqrt(x) to rsqrt.approx.f32. (There is an rsqrt.approx.f64, but
-// it's emulated in software.)
-def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)),
- (RSQRTF32approx1r Float32Regs:$b)>,
- Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>;
-
multiclass FMA<string OpcStr, RegisterClass RC, Operand ImmCls, Predicate Pred> {
def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
@@ -942,6 +1019,17 @@ multiclass FMA<string OpcStr, RegisterClass RC, Operand ImmCls, Predicate Pred>
Requires<[Pred]>;
}
+multiclass FMA_F16<string OpcStr, RegisterClass RC, Predicate Pred> {
+ def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>,
+ Requires<[useFP16Math, Pred]>;
+}
+
+defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", Float16Regs, doF32FTZ>;
+defm FMA16 : FMA_F16<"fma.rn.f16", Float16Regs, true>;
+defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", Float16x2Regs, doF32FTZ>;
+defm FMA16x2 : FMA_F16<"fma.rn.f16x2", Float16x2Regs, true>;
defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>;
defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, true>;
defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, true>;
@@ -949,10 +1037,12 @@ defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, true>;
// sin/cos
def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"sin.approx.f32 \t$dst, $src;",
- [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
+ [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>,
+ Requires<[allowUnsafeFPMath]>;
def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"cos.approx.f32 \t$dst, $src;",
- [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
+ [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>,
+ Requires<[allowUnsafeFPMath]>;
// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)),
// i.e. "poor man's fmod()"
@@ -1087,6 +1177,16 @@ defm SHL : SHIFT<"shl.b", shl>;
defm SRA : SHIFT<"shr.s", sra>;
defm SRL : SHIFT<"shr.u", srl>;
+// Bit-reverse
+def BREV32 :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
+ "brev.b32 \t$dst, $a;",
+ [(set Int32Regs:$dst, (bitreverse Int32Regs:$a))]>;
+def BREV64 :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a),
+ "brev.b64 \t$dst, $a;",
+ [(set Int64Regs:$dst, (bitreverse Int64Regs:$a))]>;
+
//
// Rotate: Use ptx shf instruction if available.
//
@@ -1294,15 +1394,15 @@ let hasSideEffects = 0 in {
def rr :
NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp),
!strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
- "\t$dst, $a, $b;"), []>;
+ " \t$dst, $a, $b;"), []>;
def ri :
NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
!strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
- "\t$dst, $a, $b;"), []>;
+ " \t$dst, $a, $b;"), []>;
def ir :
NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
!strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
- "\t$dst, $a, $b;"), []>;
+ " \t$dst, $a, $b;"), []>;
}
}
@@ -1317,6 +1417,19 @@ defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>;
defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>;
defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
+def SETP_f16rr :
+ NVPTXInst<(outs Int1Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b, CmpMode:$cmp),
+ "setp${cmp:base}${cmp:ftz}.f16 \t$dst, $a, $b;",
+ []>, Requires<[useFP16Math]>;
+
+def SETP_f16x2rr :
+ NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b, CmpMode:$cmp),
+ "setp${cmp:base}${cmp:ftz}.f16x2 \t$p|$q, $a, $b;",
+ []>,
+ Requires<[useFP16Math]>;
+
// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form
// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination
@@ -1326,13 +1439,13 @@ let hasSideEffects = 0 in {
multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs Int32Regs:$dst),
(ins RC:$a, RC:$b, CmpMode:$cmp),
- !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
+ !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
def ri : NVPTXInst<(outs Int32Regs:$dst),
(ins RC:$a, ImmCls:$b, CmpMode:$cmp),
- !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
+ !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
def ir : NVPTXInst<(outs Int32Regs:$dst),
(ins ImmCls:$a, RC:$b, CmpMode:$cmp),
- !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
+ !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
}
}
@@ -1345,6 +1458,7 @@ defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
+defm SET_f16 : SET<"f16", Float16Regs, f16imm>;
defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
@@ -1360,16 +1474,16 @@ let hasSideEffects = 0 in {
multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs RC:$dst),
(ins RC:$a, RC:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
def ri : NVPTXInst<(outs RC:$dst),
(ins RC:$a, ImmCls:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
def ir : NVPTXInst<(outs RC:$dst),
(ins ImmCls:$a, RC:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
def ii : NVPTXInst<(outs RC:$dst),
(ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
}
multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls,
@@ -1377,22 +1491,22 @@ let hasSideEffects = 0 in {
def rr :
NVPTXInst<(outs RC:$dst),
(ins RC:$a, RC:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
[(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>;
def ri :
NVPTXInst<(outs RC:$dst),
(ins RC:$a, ImmCls:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
[(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>;
def ir :
NVPTXInst<(outs RC:$dst),
(ins ImmCls:$a, RC:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
[(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>;
def ii :
NVPTXInst<(outs RC:$dst),
(ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
[(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
}
}
@@ -1408,9 +1522,17 @@ defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>;
defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
+defm SELP_f16 : SELP_PATTERN<"b16", Float16Regs, f16imm, fpimm>;
defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
+def SELP_f16x2rr :
+ NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b, Int1Regs:$p),
+ "selp.b32 \t$dst, $a, $b, $p;",
+ [(set Float16x2Regs:$dst,
+ (select Int1Regs:$p, Float16x2Regs:$a, Float16x2Regs:$b))]>;
+
//-----------------------------------
// Data Movement (Load / Store, Move)
//-----------------------------------
@@ -1472,6 +1594,9 @@ let IsSimpleMove=1, hasSideEffects=0 in {
def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
"mov.u64 \t$dst, $sss;", []>;
+ def FMOV16rr : NVPTXInst<(outs Float16Regs:$dst), (ins Float16Regs:$src),
+ // We have to use .b16 here as there's no mov.f16.
+ "mov.b16 \t$dst, $src;", []>;
def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"mov.f32 \t$dst, $src;", []>;
def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
@@ -1633,6 +1758,26 @@ def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
+ // f16 -> pred
+ def : Pat<(i1 (OpNode Float16Regs:$a, Float16Regs:$b)),
+ (SETP_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>,
+ Requires<[useFP16Math,doF32FTZ]>;
+ def : Pat<(i1 (OpNode Float16Regs:$a, Float16Regs:$b)),
+ (SETP_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>,
+ Requires<[useFP16Math]>;
+ def : Pat<(i1 (OpNode Float16Regs:$a, fpimm:$b)),
+ (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
+ Requires<[useFP16Math,doF32FTZ]>;
+ def : Pat<(i1 (OpNode Float16Regs:$a, fpimm:$b)),
+ (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
+ Requires<[useFP16Math]>;
+ def : Pat<(i1 (OpNode fpimm:$a, Float16Regs:$b)),
+ (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>,
+ Requires<[useFP16Math,doF32FTZ]>;
+ def : Pat<(i1 (OpNode fpimm:$a, Float16Regs:$b)),
+ (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>,
+ Requires<[useFP16Math]>;
+
// f32 -> pred
def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
(SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
@@ -1658,6 +1803,26 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)),
(SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
+ // f16 -> i32
+ def : Pat<(i32 (OpNode Float16Regs:$a, Float16Regs:$b)),
+ (SET_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>,
+ Requires<[useFP16Math, doF32FTZ]>;
+ def : Pat<(i32 (OpNode Float16Regs:$a, Float16Regs:$b)),
+ (SET_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>,
+ Requires<[useFP16Math]>;
+ def : Pat<(i32 (OpNode Float16Regs:$a, fpimm:$b)),
+ (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
+ Requires<[useFP16Math, doF32FTZ]>;
+ def : Pat<(i32 (OpNode Float16Regs:$a, fpimm:$b)),
+ (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
+ Requires<[useFP16Math]>;
+ def : Pat<(i32 (OpNode fpimm:$a, Float16Regs:$b)),
+ (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>,
+ Requires<[useFP16Math, doF32FTZ]>;
+ def : Pat<(i32 (OpNode fpimm:$a, Float16Regs:$b)),
+ (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>,
+ Requires<[useFP16Math]>;
+
// f32 -> i32
def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
(SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
@@ -1825,40 +1990,39 @@ def RETURNNode :
let mayLoad = 1 in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
- !strconcat(!strconcat("ld.param", opstr),
- "\t$dst, [retval0+$b];"),
+ !strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"),
[]>;
class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
!strconcat("ld.param.v2", opstr,
- "\t{{$dst, $dst2}}, [retval0+$b];"), []>;
+ " \t{{$dst, $dst2}}, [retval0+$b];"), []>;
class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins i32imm:$b),
!strconcat("ld.param.v4", opstr,
- "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"),
+ " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"),
[]>;
}
class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
- !strconcat("mov", opstr, "\t$dst, retval$b;"),
+ !strconcat("mov", opstr, " \t$dst, retval$b;"),
[(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
let mayStore = 1 in {
class StoreParamInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
- !strconcat("st.param", opstr, "\t[param$a+$b], $val;"),
+ !strconcat("st.param", opstr, " \t[param$a+$b], $val;"),
[]>;
class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
i32imm:$a, i32imm:$b),
!strconcat("st.param.v2", opstr,
- "\t[param$a+$b], {{$val, $val2}};"),
+ " \t[param$a+$b], {{$val, $val2}};"),
[]>;
class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
@@ -1866,18 +2030,18 @@ let mayStore = 1 in {
regclass:$val4, i32imm:$a,
i32imm:$b),
!strconcat("st.param.v4", opstr,
- "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
+ " \t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
[]>;
class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
- !strconcat("st.param", opstr, "\t[func_retval0+$a], $val;"),
+ !strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"),
[]>;
class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
!strconcat("st.param.v2", opstr,
- "\t[func_retval0+$a], {{$val, $val2}};"),
+ " \t[func_retval0+$a], {{$val, $val2}};"),
[]>;
class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
@@ -1885,7 +2049,7 @@ let mayStore = 1 in {
(ins regclass:$val, regclass:$val2, regclass:$val3,
regclass:$val4, i32imm:$a),
!strconcat("st.param.v4", opstr,
- "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
+ " \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
[]>;
}
@@ -1941,10 +2105,16 @@ def LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">;
def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">;
def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">;
def LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">;
+def LoadParamMemF16 : LoadParamMemInst<Float16Regs, ".b16">;
+def LoadParamMemF16x2 : LoadParamMemInst<Float16x2Regs, ".b32">;
def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
+def LoadParamMemV2F16 : LoadParamV2MemInst<Float16Regs, ".b16">;
+def LoadParamMemV2F16x2: LoadParamV2MemInst<Float16x2Regs, ".b32">;
def LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">;
def LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">;
+def LoadParamMemV4F16 : LoadParamV4MemInst<Float16Regs, ".b16">;
+def LoadParamMemV4F16x2: LoadParamV4MemInst<Float16x2Regs, ".b32">;
def LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">;
def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
@@ -1961,10 +2131,16 @@ def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">;
def StoreParamV4I16 : StoreParamV4Inst<Int16Regs, ".b16">;
def StoreParamV4I8 : StoreParamV4Inst<Int16Regs, ".b8">;
+def StoreParamF16 : StoreParamInst<Float16Regs, ".b16">;
+def StoreParamF16x2 : StoreParamInst<Float16x2Regs, ".b32">;
def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
+def StoreParamV2F16 : StoreParamV2Inst<Float16Regs, ".b16">;
+def StoreParamV2F16x2 : StoreParamV2Inst<Float16x2Regs, ".b32">;
def StoreParamV2F32 : StoreParamV2Inst<Float32Regs, ".f32">;
def StoreParamV2F64 : StoreParamV2Inst<Float64Regs, ".f64">;
+def StoreParamV4F16 : StoreParamV4Inst<Float16Regs, ".b16">;
+def StoreParamV4F16x2 : StoreParamV4Inst<Float16x2Regs, ".b32">;
def StoreParamV4F32 : StoreParamV4Inst<Float32Regs, ".f32">;
def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
@@ -1981,9 +2157,15 @@ def StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">;
def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
+def StoreRetvalF16 : StoreRetvalInst<Float16Regs, ".b16">;
+def StoreRetvalF16x2 : StoreRetvalInst<Float16x2Regs, ".b32">;
def StoreRetvalV2F64 : StoreRetvalV2Inst<Float64Regs, ".f64">;
def StoreRetvalV2F32 : StoreRetvalV2Inst<Float32Regs, ".f32">;
+def StoreRetvalV2F16 : StoreRetvalV2Inst<Float16Regs, ".b16">;
+def StoreRetvalV2F16x2: StoreRetvalV2Inst<Float16x2Regs, ".b32">;
def StoreRetvalV4F32 : StoreRetvalV4Inst<Float32Regs, ".f32">;
+def StoreRetvalV4F16 : StoreRetvalV4Inst<Float16Regs, ".b16">;
+def StoreRetvalV4F16x2: StoreRetvalV4Inst<Float16x2Regs, ".b32">;
def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
@@ -2057,17 +2239,18 @@ def DeclareScalarRegInst :
class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
- !strconcat("mov", asmstr, "\t$dst, $src;"),
+ !strconcat("mov", asmstr, " \t$dst, $src;"),
[(set regclass:$dst, (MoveParam regclass:$src))]>;
def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
def MoveParamI16 :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
- "cvt.u16.u32\t$dst, $src;",
+ "cvt.u16.u32 \t$dst, $src;",
[(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
+def MoveParamF16 : MoveParamInst<Float16Regs, ".f16">;
class PseudoUseParamInst<NVPTXRegClass regclass> :
NVPTXInst<(outs), (ins regclass:$src),
@@ -2128,6 +2311,8 @@ let mayLoad=1, hasSideEffects=0 in {
defm LD_i16 : LD<Int16Regs>;
defm LD_i32 : LD<Int32Regs>;
defm LD_i64 : LD<Int64Regs>;
+ defm LD_f16 : LD<Float16Regs>;
+ defm LD_f16x2 : LD<Float16x2Regs>;
defm LD_f32 : LD<Float32Regs>;
defm LD_f64 : LD<Float64Regs>;
}
@@ -2176,6 +2361,8 @@ let mayStore=1, hasSideEffects=0 in {
defm ST_i16 : ST<Int16Regs>;
defm ST_i32 : ST<Int32Regs>;
defm ST_i64 : ST<Int64Regs>;
+ defm ST_f16 : ST<Float16Regs>;
+ defm ST_f16x2 : ST<Float16x2Regs>;
defm ST_f32 : ST<Float32Regs>;
defm ST_f64 : ST<Float64Regs>;
}
@@ -2262,6 +2449,8 @@ let mayLoad=1, hasSideEffects=0 in {
defm LDV_i16 : LD_VEC<Int16Regs>;
defm LDV_i32 : LD_VEC<Int32Regs>;
defm LDV_i64 : LD_VEC<Int64Regs>;
+ defm LDV_f16 : LD_VEC<Float16Regs>;
+ defm LDV_f16x2 : LD_VEC<Float16x2Regs>;
defm LDV_f32 : LD_VEC<Float32Regs>;
defm LDV_f64 : LD_VEC<Float64Regs>;
}
@@ -2355,28 +2544,53 @@ let mayStore=1, hasSideEffects=0 in {
defm STV_i16 : ST_VEC<Int16Regs>;
defm STV_i32 : ST_VEC<Int32Regs>;
defm STV_i64 : ST_VEC<Int64Regs>;
+ defm STV_f16 : ST_VEC<Float16Regs>;
+ defm STV_f16x2 : ST_VEC<Float16x2Regs>;
defm STV_f32 : ST_VEC<Float32Regs>;
defm STV_f64 : ST_VEC<Float64Regs>;
}
-
//---- Conversion ----
class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
NVPTXRegClass regclassOut> :
NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
- !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
+ !strconcat("mov.b", !strconcat(SzStr, " \t$d, $a;")),
[(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
+def BITCONVERT_16_I2F : F_BITCONVERT<"16", Int16Regs, Float16Regs>;
+def BITCONVERT_16_F2I : F_BITCONVERT<"16", Float16Regs, Int16Regs>;
def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
+def BITCONVERT_32_I2F16x2 : F_BITCONVERT<"32", Int32Regs, Float16x2Regs>;
+def BITCONVERT_32_F16x22I : F_BITCONVERT<"32", Float16x2Regs, Int32Regs>;
// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
// we cannot specify floating-point literals in isel patterns. Therefore, we
// use an integer selp to select either 1 or 0 and then cvt to floating-point.
+// sint -> f16
+def : Pat<(f16 (sint_to_fp Int1Regs:$a)),
+ (CVT_f16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
+def : Pat<(f16 (sint_to_fp Int16Regs:$a)),
+ (CVT_f16_s16 Int16Regs:$a, CvtRN)>;
+def : Pat<(f16 (sint_to_fp Int32Regs:$a)),
+ (CVT_f16_s32 Int32Regs:$a, CvtRN)>;
+def : Pat<(f16 (sint_to_fp Int64Regs:$a)),
+ (CVT_f16_s64 Int64Regs:$a, CvtRN)>;
+
+// uint -> f16
+def : Pat<(f16 (uint_to_fp Int1Regs:$a)),
+ (CVT_f16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
+def : Pat<(f16 (uint_to_fp Int16Regs:$a)),
+ (CVT_f16_u16 Int16Regs:$a, CvtRN)>;
+def : Pat<(f16 (uint_to_fp Int32Regs:$a)),
+ (CVT_f16_u32 Int32Regs:$a, CvtRN)>;
+def : Pat<(f16 (uint_to_fp Int64Regs:$a)),
+ (CVT_f16_u64 Int64Regs:$a, CvtRN)>;
+
// sint -> f32
def : Pat<(f32 (sint_to_fp Int1Regs:$a)),
(CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
@@ -2418,6 +2632,38 @@ def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
(CVT_f64_u64 Int64Regs:$a, CvtRN)>;
+// f16 -> sint
+def : Pat<(i1 (fp_to_sint Float16Regs:$a)),
+ (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
+def : Pat<(i16 (fp_to_sint Float16Regs:$a)),
+ (CVT_s16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(i16 (fp_to_sint Float16Regs:$a)),
+ (CVT_s16_f16 Float16Regs:$a, CvtRZI)>;
+def : Pat<(i32 (fp_to_sint Float16Regs:$a)),
+ (CVT_s32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(i32 (fp_to_sint Float16Regs:$a)),
+ (CVT_s32_f16 Float16Regs:$a, CvtRZI)>;
+def : Pat<(i64 (fp_to_sint Float16Regs:$a)),
+ (CVT_s64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(i64 (fp_to_sint Float16Regs:$a)),
+ (CVT_s64_f16 Float16Regs:$a, CvtRZI)>;
+
+// f16 -> uint
+def : Pat<(i1 (fp_to_uint Float16Regs:$a)),
+ (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
+def : Pat<(i16 (fp_to_uint Float16Regs:$a)),
+ (CVT_u16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(i16 (fp_to_uint Float16Regs:$a)),
+ (CVT_u16_f16 Float16Regs:$a, CvtRZI)>;
+def : Pat<(i32 (fp_to_uint Float16Regs:$a)),
+ (CVT_u32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(i32 (fp_to_uint Float16Regs:$a)),
+ (CVT_u32_f16 Float16Regs:$a, CvtRZI)>;
+def : Pat<(i64 (fp_to_uint Float16Regs:$a)),
+ (CVT_u64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(i64 (fp_to_uint Float16Regs:$a)),
+ (CVT_u64_f16 Float16Regs:$a, CvtRZI)>;
+
// f32 -> sint
def : Pat<(i1 (fp_to_sint Float32Regs:$a)),
(SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
@@ -2562,6 +2808,9 @@ def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
(SELP_b64rr Int64Regs:$a, Int64Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
+def : Pat<(select Int32Regs:$pred, Float16Regs:$a, Float16Regs:$b),
+ (SELP_f16rr Float16Regs:$a, Float16Regs:$b,
+ (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
(SELP_f32rr Float32Regs:$a, Float32Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
@@ -2575,77 +2824,150 @@ let hasSideEffects = 0 in {
def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
(ins Int16Regs:$s1, Int16Regs:$s2,
Int16Regs:$s3, Int16Regs:$s4),
- "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", []>;
+ "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>;
def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
(ins Int16Regs:$s1, Int16Regs:$s2),
- "mov.b32\t$d, {{$s1, $s2}};", []>;
+ "mov.b32 \t$d, {{$s1, $s2}};", []>;
def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
(ins Int32Regs:$s1, Int32Regs:$s2),
- "mov.b64\t$d, {{$s1, $s2}};", []>;
+ "mov.b64 \t$d, {{$s1, $s2}};", []>;
def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
(ins Float32Regs:$s1, Float32Regs:$s2),
- "mov.b64\t$d, {{$s1, $s2}};", []>;
+ "mov.b64 \t$d, {{$s1, $s2}};", []>;
// unpack a larger int register to a set of smaller int registers
def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
Int16Regs:$d3, Int16Regs:$d4),
(ins Int64Regs:$s),
- "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", []>;
+ "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>;
def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
(ins Int32Regs:$s),
- "mov.b32\t{{$d1, $d2}}, $s;", []>;
+ "mov.b32 \t{{$d1, $d2}}, $s;", []>;
def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
(ins Int64Regs:$s),
- "mov.b64\t{{$d1, $d2}}, $s;", []>;
+ "mov.b64 \t{{$d1, $d2}}, $s;", []>;
def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
(ins Float64Regs:$s),
- "mov.b64\t{{$d1, $d2}}, $s;", []>;
+ "mov.b64 \t{{$d1, $d2}}, $s;", []>;
+
+}
+
+let hasSideEffects = 0 in {
+ // Extract element of f16x2 register. PTX does not provide any way
+ // to access elements of f16x2 vector directly, so we need to
+ // extract it using a temporary register.
+ def F16x2toF16_0 : NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16x2Regs:$src),
+ "{{ .reg .b16 \t%tmp_hi;\n\t"
+ " mov.b32 \t{$dst, %tmp_hi}, $src; }}",
+ [(set Float16Regs:$dst,
+ (extractelt (v2f16 Float16x2Regs:$src), 0))]>;
+ def F16x2toF16_1 : NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16x2Regs:$src),
+ "{{ .reg .b16 \t%tmp_lo;\n\t"
+ " mov.b32 \t{%tmp_lo, $dst}, $src; }}",
+ [(set Float16Regs:$dst,
+ (extractelt (v2f16 Float16x2Regs:$src), 1))]>;
+
+ // Coalesce two f16 registers into f16x2
+ def BuildF16x2 : NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b),
+ "mov.b32 \t$dst, {{$a, $b}};",
+ [(set Float16x2Regs:$dst,
+ (build_vector (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>;
+
+ // Directly initializing underlying the b32 register is one less SASS
+ // instruction than than vector-packing move.
+ def BuildF16x2i : NVPTXInst<(outs Float16x2Regs:$dst), (ins i32imm:$src),
+ "mov.b32 \t$dst, $src;",
+ []>;
+
+ // Split f16x2 into two f16 registers.
+ def SplitF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi),
+ (ins Float16x2Regs:$src),
+ "mov.b32 \t{{$lo, $hi}}, $src;",
+ []>;
+ // Split an i32 into two f16
+ def SplitI32toF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi),
+ (ins Int32Regs:$src),
+ "mov.b32 \t{{$lo, $hi}}, $src;",
+ []>;
}
// Count leading zeros
let hasSideEffects = 0 in {
def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
- "clz.b32\t$d, $a;", []>;
+ "clz.b32 \t$d, $a;", []>;
def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
- "clz.b64\t$d, $a;", []>;
+ "clz.b64 \t$d, $a;", []>;
}
// 32-bit has a direct PTX instruction
def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>;
-// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
-// to 64-bit to match the LLVM semantics
+// The return type of the ctlz ISD node is the same as its input, but the PTX
+// ctz instruction always returns a 32-bit value. For ctlz.i64, convert the
+// ptx value to 64 bits to match the ISD node's semantics, unless we know we're
+// truncating back down to 32 bits.
def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
+def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>;
-// For 16-bit, we zero-extend to 32-bit, then trunc the result back
-// to 16-bits (ctlz of a 16-bit value is guaranteed to require less
-// than 16 bits to store). We also need to subtract 16 because the
-// high-order 16 zeros were counted.
+// For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the
+// result back to 16-bits if necessary. We also need to subtract 16 because
+// the high-order 16 zeros were counted.
+//
+// TODO: NVPTX has a mov.b32 b32reg, {imm, b16reg} instruction, which we could
+// use to save one SASS instruction (on sm_35 anyway):
+//
+// mov.b32 $tmp, {0xffff, $a}
+// ctlz.b32 $result, $tmp
+//
+// That is, instead of zero-extending the input to 32 bits, we'd "one-extend"
+// and then ctlz that value. This way we don't have to subtract 16 from the
+// result. Unfortunately today we don't have a way to generate
+// "mov b32reg, {b16imm, b16reg}", so we don't do this optimization.
def : Pat<(ctlz Int16Regs:$a),
- (SUBi16ri (CVT_u16_u32 (CLZr32
- (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
- CvtNONE), 16)>;
+ (SUBi16ri (CVT_u16_u32
+ (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>;
+def : Pat<(i32 (zext (ctlz Int16Regs:$a))),
+ (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>;
// Population count
let hasSideEffects = 0 in {
def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
- "popc.b32\t$d, $a;", []>;
+ "popc.b32 \t$d, $a;", []>;
def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
- "popc.b64\t$d, $a;", []>;
+ "popc.b64 \t$d, $a;", []>;
}
// 32-bit has a direct PTX instruction
def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>;
-// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
-// to 64-bit to match the LLVM semantics
+// For 64-bit, the result in PTX is actually 32-bit so we zero-extend to 64-bit
+// to match the LLVM semantics. Just as with ctlz.i64, we provide a second
+// pattern that avoids the type conversion if we're truncating the result to
+// i32 anyway.
def : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
+def : Pat<(i32 (trunc (ctpop Int64Regs:$a))), (POPCr64 Int64Regs:$a)>;
-// For 16-bit, we zero-extend to 32-bit, then trunc the result back
-// to 16-bits (ctpop of a 16-bit value is guaranteed to require less
-// than 16 bits to store)
+// For 16-bit, we zero-extend to 32-bit, then trunc the result back to 16-bits.
+// If we know that we're storing into an i32, we can avoid the final trunc.
def : Pat<(ctpop Int16Regs:$a),
(CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>;
+def : Pat<(i32 (zext (ctpop Int16Regs:$a))),
+ (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>;
+
+// fpround f32 -> f16
+def : Pat<(f16 (fpround Float32Regs:$a)),
+ (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f16 (fpround Float32Regs:$a)),
+ (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
+
+// fpround f64 -> f16
+def : Pat<(f16 (fpround Float64Regs:$a)),
+ (CVT_f16_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f16 (fpround Float64Regs:$a)),
+ (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
// fpround f64 -> f32
def : Pat<(f32 (fpround Float64Regs:$a)),
@@ -2653,6 +2975,18 @@ def : Pat<(f32 (fpround Float64Regs:$a)),
def : Pat<(f32 (fpround Float64Regs:$a)),
(CVT_f32_f64 Float64Regs:$a, CvtRN)>;
+// fpextend f16 -> f32
+def : Pat<(f32 (fpextend Float16Regs:$a)),
+ (CVT_f32_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f32 (fpextend Float16Regs:$a)),
+ (CVT_f32_f16 Float16Regs:$a, CvtNONE)>;
+
+// fpextend f16 -> f64
+def : Pat<(f64 (fpextend Float16Regs:$a)),
+ (CVT_f64_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f64 (fpextend Float16Regs:$a)),
+ (CVT_f64_f16 Float16Regs:$a, CvtNONE)>;
+
// fpextend f32 -> f64
def : Pat<(f64 (fpextend Float32Regs:$a)),
(CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
@@ -2664,6 +2998,10 @@ def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
// fceil, ffloor, fround, ftrunc.
+def : Pat<(fceil Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(fceil Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRPI)>, Requires<[doNoF32FTZ]>;
def : Pat<(fceil Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(fceil Float32Regs:$a),
@@ -2671,6 +3009,10 @@ def : Pat<(fceil Float32Regs:$a),
def : Pat<(fceil Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
+def : Pat<(ffloor Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(ffloor Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>;
def : Pat<(ffloor Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ffloor Float32Regs:$a),
@@ -2678,6 +3020,10 @@ def : Pat<(ffloor Float32Regs:$a),
def : Pat<(ffloor Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
+def : Pat<(fround Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f16 (fround Float16Regs:$a)),
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
def : Pat<(fround Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f32 (fround Float32Regs:$a)),
@@ -2685,6 +3031,10 @@ def : Pat<(f32 (fround Float32Regs:$a)),
def : Pat<(f64 (fround Float64Regs:$a)),
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
+def : Pat<(ftrunc Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(ftrunc Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>;
def : Pat<(ftrunc Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ftrunc Float32Regs:$a),
@@ -2696,6 +3046,10 @@ def : Pat<(ftrunc Float64Regs:$a),
// strictly correct, because it causes us to ignore the rounding mode. But it
// matches what CUDA's "libm" does.
+def : Pat<(fnearbyint Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(fnearbyint Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
def : Pat<(fnearbyint Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(fnearbyint Float32Regs:$a),
@@ -2703,6 +3057,10 @@ def : Pat<(fnearbyint Float32Regs:$a),
def : Pat<(fnearbyint Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
+def : Pat<(frint Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(frint Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
def : Pat<(frint Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(frint Float32Regs:$a),
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index b0408f12f5b1..8d228a9eeb74 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -36,33 +36,39 @@ let isConvergent = 1 in {
def INT_BARRIER0 : NVPTXInst<(outs), (ins),
"bar.sync \t0;",
[(int_nvvm_barrier0)]>;
+def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
+ "bar.sync \t$src1;",
+ [(int_nvvm_barrier_n Int32Regs:$src1)]>;
+def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
+ "bar.sync \t$src1, $src2;",
+ [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
!strconcat("{{ \n\t",
- !strconcat(".reg .pred \t%p1; \n\t",
- !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
- !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
- !strconcat("}}", ""))))),
+ ".reg .pred \t%p1; \n\t",
+ "setp.ne.u32 \t%p1, $pred, 0; \n\t",
+ "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
+ "}}"),
[(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
!strconcat("{{ \n\t",
- !strconcat(".reg .pred \t%p1; \n\t",
- !strconcat(".reg .pred \t%p2; \n\t",
- !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
- !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t",
- !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
- !strconcat("}}", ""))))))),
+ ".reg .pred \t%p1; \n\t",
+ ".reg .pred \t%p2; \n\t",
+ "setp.ne.u32 \t%p1, $pred, 0; \n\t",
+ "bar.red.and.pred \t%p2, 0, %p1; \n\t",
+ "selp.u32 \t$dst, 1, 0, %p2; \n\t",
+ "}}"),
[(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
!strconcat("{{ \n\t",
- !strconcat(".reg .pred \t%p1; \n\t",
- !strconcat(".reg .pred \t%p2; \n\t",
- !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
- !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t",
- !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
- !strconcat("}}", ""))))))),
+ ".reg .pred \t%p1; \n\t",
+ ".reg .pred \t%p2; \n\t",
+ "setp.ne.u32 \t%p1, $pred, 0; \n\t",
+ "bar.red.or.pred \t%p2, 0, %p1; \n\t",
+ "selp.u32 \t$dst, 1, 0, %p2; \n\t",
+ "}}"),
[(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
-def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
+def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
[(int_nvvm_bar_sync imm:$i)]>;
// shfl.{up,down,bfly,idx}.b32
@@ -187,16 +193,6 @@ class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
// MISC
//
-def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
- int_nvvm_clz_i>;
-def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
- int_nvvm_clz_ll>;
-
-def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
- int_nvvm_popc_i>;
-def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
- int_nvvm_popc_ll>;
-
def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
@@ -204,26 +200,6 @@ def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
// Min Max
//
-def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs,
- Int32Regs, Int32Regs, int_nvvm_min_i>;
-def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs,
- Int32Regs, Int32Regs, int_nvvm_min_ui>;
-
-def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs,
- Int64Regs, Int64Regs, int_nvvm_min_ll>;
-def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs,
- Int64Regs, Int64Regs, int_nvvm_min_ull>;
-
-def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs,
- Int32Regs, Int32Regs, int_nvvm_max_i>;
-def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs,
- Int32Regs, Int32Regs, int_nvvm_max_ui>;
-
-def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs,
- Int64Regs, Int64Regs, int_nvvm_max_ll>;
-def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs,
- Int64Regs, Int64Regs, int_nvvm_max_ull>;
-
def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
Float32Regs, Float32Regs, int_nvvm_fmin_f>;
def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
@@ -239,6 +215,7 @@ def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
Float64Regs, Float64Regs, int_nvvm_fmax_d>;
+
//
// Multiplication
//
@@ -321,15 +298,6 @@ def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
//
-// Brev
-//
-
-def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
- int_nvvm_brev32>;
-def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs,
- int_nvvm_brev64>;
-
-//
// Sad
//
@@ -360,11 +328,6 @@ def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
// Abs
//
-def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs,
- int_nvvm_abs_i>;
-def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs,
- int_nvvm_abs_ll>;
-
def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
Float32Regs, int_nvvm_fabs_ftz_f>;
def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
@@ -703,16 +666,18 @@ def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
-def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t",
- !strconcat(".reg .b32 %temp; \n\t",
- !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t",
- "}}"))),
- Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
-def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t",
- !strconcat(".reg .b32 %temp; \n\t",
- !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t",
- "}}"))),
- Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
+def INT_NVVM_D2I_LO : F_MATH_1<
+ !strconcat("{{\n\t",
+ ".reg .b32 %temp; \n\t",
+ "mov.b64 \t{$dst, %temp}, $src0;\n\t",
+ "}}"),
+ Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
+def INT_NVVM_D2I_HI : F_MATH_1<
+ !strconcat("{{\n\t",
+ ".reg .b32 %temp; \n\t",
+ "mov.b64 \t{%temp, $dst}, $src0;\n\t",
+ "}}"),
+ Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
(CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
@@ -803,49 +768,10 @@ def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
(CVT_f64_u64 Int64Regs:$a, CvtRP)>;
-// FIXME: Ideally, we could use these patterns instead of the scope-creating
-// patterns, but ptxas does not like these since .s16 is not compatible with
-// .f16. The solution is to use .bXX for all integer register types, but we
-// are not there yet.
-//def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
-// (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
-//def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
-// (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
-//
-//def : Pat<(int_nvvm_h2f Int16Regs:$a),
-// (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
-
-def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
- !strconcat(".reg .b16 %temp;\n\t",
- !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t",
- !strconcat("mov.b16 \t$dst, %temp;\n",
- "}}")))),
- Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>;
-def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t",
- !strconcat(".reg .b16 %temp;\n\t",
- !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t",
- !strconcat("mov.b16 \t$dst, %temp;\n",
- "}}")))),
- Int16Regs, Float32Regs, int_nvvm_f2h_rn>;
-
-def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
- !strconcat(".reg .b16 %temp;\n\t",
- !strconcat("mov.b16 \t%temp, $src0;\n\t",
- !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t",
- "}}")))),
- Float32Regs, Int16Regs, int_nvvm_h2f>;
-
-def : Pat<(f32 (f16_to_fp Int16Regs:$a)),
- (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
-def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
- (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
- (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
-
-def : Pat<(f64 (f16_to_fp Int16Regs:$a)),
- (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
-def : Pat<(i16 (fp_to_f16 Float64Regs:$a)),
- (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
+def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
+ (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
+def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
+ (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
//
// Bitcast
@@ -882,20 +808,12 @@ multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, SDNode IMM, Predicate Pred> {
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], $b;", ""))))),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
Requires<[Pred]>;
def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], $b;", ""))))),
- [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
Requires<[Pred]>;
}
multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
@@ -911,21 +829,13 @@ multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, Predicate Pred> {
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
- !strconcat("{{ \n\t",
- !strconcat(".reg \t.s",
- !strconcat(TypeStr,
- !strconcat(" temp; \n\t",
- !strconcat("neg.s",
- !strconcat(TypeStr,
- !strconcat(" \ttemp, $b; \n\t",
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(".u",
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], temp; \n\t",
- !strconcat("}}", "")))))))))))))),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
+ !strconcat(
+ "{{ \n\t",
+ ".reg \t.s", TypeStr, " temp; \n\t",
+ "neg.s", TypeStr, " \ttemp, $b; \n\t",
+ "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
+ "}}"),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
Requires<[Pred]>;
}
multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
@@ -943,40 +853,26 @@ multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
Operand IMMType, Predicate Pred> {
def reg : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, regclass:$b, regclass:$c),
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
- [(set regclass:$dst,
- (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
- Requires<[Pred]>;
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
+ Requires<[Pred]>;
+
def imm1 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, IMMType:$b, regclass:$c),
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
- [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
Requires<[Pred]>;
+
def imm2 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, regclass:$b, IMMType:$c),
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
Requires<[Pred]>;
+
def imm3 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, IMMType:$b, IMMType:$c),
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
- [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
Requires<[Pred]>;
}
multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
@@ -1607,6 +1503,8 @@ defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
+defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
+defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
@@ -1657,6 +1555,10 @@ defm INT_PTX_LDU_G_v2i16_ELE
: VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
defm INT_PTX_LDU_G_v2i32_ELE
: VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
+defm INT_PTX_LDU_G_v2f16_ELE
+ : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
+defm INT_PTX_LDU_G_v2f16x2_ELE
+ : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
defm INT_PTX_LDU_G_v2f32_ELE
: VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
defm INT_PTX_LDU_G_v2i64_ELE
@@ -1671,6 +1573,12 @@ defm INT_PTX_LDU_G_v4i16_ELE
defm INT_PTX_LDU_G_v4i32_ELE
: VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
Int32Regs>;
+defm INT_PTX_LDU_G_v4f16_ELE
+ : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
+ Float16Regs>;
+defm INT_PTX_LDU_G_v4f16x2_ELE
+ : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
+ Float16x2Regs>;
defm INT_PTX_LDU_G_v4f32_ELE
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
Float32Regs>;
@@ -1710,6 +1618,10 @@ defm INT_PTX_LDG_GLOBAL_i32
: LDG_G<"u32 \t$result, [$src];", Int32Regs>;
defm INT_PTX_LDG_GLOBAL_i64
: LDG_G<"u64 \t$result, [$src];", Int64Regs>;
+defm INT_PTX_LDG_GLOBAL_f16
+ : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
+defm INT_PTX_LDG_GLOBAL_f16x2
+ : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
defm INT_PTX_LDG_GLOBAL_f32
: LDG_G<"f32 \t$result, [$src];", Float32Regs>;
defm INT_PTX_LDG_GLOBAL_f64
@@ -1765,6 +1677,10 @@ defm INT_PTX_LDG_G_v2i16_ELE
: VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
defm INT_PTX_LDG_G_v2i32_ELE
: VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v2f16_ELE
+ : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
+defm INT_PTX_LDG_G_v2f16x2_ELE
+ : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
defm INT_PTX_LDG_G_v2f32_ELE
: VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
defm INT_PTX_LDG_G_v2i64_ELE
@@ -1777,17 +1693,21 @@ defm INT_PTX_LDG_G_v4i16_ELE
: VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
defm INT_PTX_LDG_G_v4i32_ELE
: VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v4f16_ELE
+ : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
+defm INT_PTX_LDG_G_v4f16x2_ELE
+ : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
defm INT_PTX_LDG_G_v4f32_ELE
: VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
multiclass NG_TO_G<string Str, Intrinsic Intrin> {
def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
+ !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
[(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
Requires<[hasGenericLdSt]>;
def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
- !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
+ !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
[(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
Requires<[hasGenericLdSt]>;
@@ -1821,11 +1741,11 @@ multiclass NG_TO_G<string Str, Intrinsic Intrin> {
multiclass G_TO_NG<string Str, Intrinsic Intrin> {
def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")),
+ !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
[(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
Requires<[hasGenericLdSt]>;
def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
- !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")),
+ !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
[(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
Requires<[hasGenericLdSt]>;
def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
@@ -1983,7 +1903,7 @@ def ISSPACEP_SHARED_64
// Special register reads
def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
(ins SpecialRegs:$r),
- "mov.b32\t$d, $r;", []>;
+ "mov.b32 \t$d, $r;", []>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
@@ -2046,20 +1966,18 @@ def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
Requires<[noHWROT32]> ;
let hasSideEffects = 0 in {
- def GET_LO_INT64
- : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
- !strconcat("{{\n\t",
- !strconcat(".reg .b32 %dummy;\n\t",
- !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t",
- !strconcat("}}", "")))),
+ def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
+ !strconcat("{{\n\t",
+ ".reg .b32 %dummy;\n\t",
+ "mov.b64 \t{$dst,%dummy}, $src;\n\t",
+ "}}"),
[]> ;
- def GET_HI_INT64
- : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
- !strconcat("{{\n\t",
- !strconcat(".reg .b32 %dummy;\n\t",
- !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t",
- !strconcat("}}", "")))),
+ def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
+ !strconcat("{{\n\t",
+ ".reg .b32 %dummy;\n\t",
+ "mov.b64 \t{%dummy,$dst}, $src;\n\t",
+ "}}"),
[]> ;
}
@@ -2164,19 +2082,19 @@ def TEX_1D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
def TEX_1D_F32_F32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
def TEX_1D_F32_F32_LEVEL
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
- "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], $lod;",
[]>;
def TEX_1D_F32_F32_GRAD
@@ -2184,27 +2102,27 @@ def TEX_1D_F32_F32_GRAD
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_1D_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
def TEX_1D_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
def TEX_1D_S32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], $lod;",
[]>;
def TEX_1D_S32_F32_GRAD
@@ -2212,27 +2130,27 @@ def TEX_1D_S32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_1D_U32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
def TEX_1D_U32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
def TEX_1D_U32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], $lod;",
[]>;
def TEX_1D_U32_F32_GRAD
@@ -2240,7 +2158,7 @@ def TEX_1D_U32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
@@ -2248,14 +2166,14 @@ def TEX_1D_ARRAY_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
def TEX_1D_ARRAY_F32_F32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
def TEX_1D_ARRAY_F32_F32_LEVEL
@@ -2263,7 +2181,7 @@ def TEX_1D_ARRAY_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], $lod;",
[]>;
def TEX_1D_ARRAY_F32_F32_GRAD
@@ -2271,21 +2189,21 @@ def TEX_1D_ARRAY_F32_F32_GRAD
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_1D_ARRAY_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
def TEX_1D_ARRAY_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
def TEX_1D_ARRAY_S32_F32_LEVEL
@@ -2293,7 +2211,7 @@ def TEX_1D_ARRAY_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], $lod;",
[]>;
def TEX_1D_ARRAY_S32_F32_GRAD
@@ -2301,21 +2219,21 @@ def TEX_1D_ARRAY_S32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_1D_ARRAY_U32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
def TEX_1D_ARRAY_U32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
def TEX_1D_ARRAY_U32_F32_LEVEL
@@ -2323,7 +2241,7 @@ def TEX_1D_ARRAY_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], $lod;",
[]>;
def TEX_1D_ARRAY_U32_F32_GRAD
@@ -2331,7 +2249,7 @@ def TEX_1D_ARRAY_U32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
@@ -2339,14 +2257,14 @@ def TEX_2D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TEX_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TEX_2D_F32_F32_LEVEL
@@ -2354,7 +2272,7 @@ def TEX_2D_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$lod),
- "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], $lod;",
[]>;
def TEX_2D_F32_F32_GRAD
@@ -2363,7 +2281,7 @@ def TEX_2D_F32_F32_GRAD
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -2371,14 +2289,14 @@ def TEX_2D_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TEX_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TEX_2D_S32_F32_LEVEL
@@ -2386,7 +2304,7 @@ def TEX_2D_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$lod),
- "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], $lod;",
[]>;
def TEX_2D_S32_F32_GRAD
@@ -2395,7 +2313,7 @@ def TEX_2D_S32_F32_GRAD
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -2403,14 +2321,14 @@ def TEX_2D_U32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TEX_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TEX_2D_U32_F32_LEVEL
@@ -2418,7 +2336,7 @@ def TEX_2D_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$lod),
- "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], $lod;",
[]>;
def TEX_2D_U32_F32_GRAD
@@ -2427,7 +2345,7 @@ def TEX_2D_U32_F32_GRAD
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -2437,7 +2355,7 @@ def TEX_2D_ARRAY_F32_S32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$y),
- "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_2D_ARRAY_F32_F32
@@ -2445,7 +2363,7 @@ def TEX_2D_ARRAY_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y),
- "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_2D_ARRAY_F32_F32_LEVEL
@@ -2453,7 +2371,7 @@ def TEX_2D_ARRAY_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
def TEX_2D_ARRAY_F32_F32_GRAD
@@ -2462,7 +2380,7 @@ def TEX_2D_ARRAY_F32_F32_GRAD
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -2471,7 +2389,7 @@ def TEX_2D_ARRAY_S32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$y),
- "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_2D_ARRAY_S32_F32
@@ -2479,7 +2397,7 @@ def TEX_2D_ARRAY_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y),
- "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_2D_ARRAY_S32_F32_LEVEL
@@ -2487,7 +2405,7 @@ def TEX_2D_ARRAY_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
def TEX_2D_ARRAY_S32_F32_GRAD
@@ -2497,7 +2415,7 @@ def TEX_2D_ARRAY_S32_F32_GRAD
Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -2506,7 +2424,7 @@ def TEX_2D_ARRAY_U32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$y),
- "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_2D_ARRAY_U32_F32
@@ -2514,7 +2432,7 @@ def TEX_2D_ARRAY_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y),
- "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_2D_ARRAY_U32_F32_LEVEL
@@ -2522,7 +2440,7 @@ def TEX_2D_ARRAY_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
def TEX_2D_ARRAY_U32_F32_GRAD
@@ -2532,7 +2450,7 @@ def TEX_2D_ARRAY_U32_F32_GRAD
Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -2542,7 +2460,7 @@ def TEX_3D_F32_S32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$z),
- "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_3D_F32_F32
@@ -2550,7 +2468,7 @@ def TEX_3D_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z),
- "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_3D_F32_F32_LEVEL
@@ -2558,7 +2476,7 @@ def TEX_3D_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_3D_F32_F32_GRAD
@@ -2569,7 +2487,7 @@ def TEX_3D_F32_F32_GRAD
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$gradx2, Float32Regs:$grady0,
Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], "
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
@@ -2579,7 +2497,7 @@ def TEX_3D_S32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$z),
- "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_3D_S32_F32
@@ -2587,7 +2505,7 @@ def TEX_3D_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z),
- "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_3D_S32_F32_LEVEL
@@ -2595,7 +2513,7 @@ def TEX_3D_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_3D_S32_F32_GRAD
@@ -2606,7 +2524,7 @@ def TEX_3D_S32_F32_GRAD
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$gradx2, Float32Regs:$grady0,
Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], "
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
@@ -2616,7 +2534,7 @@ def TEX_3D_U32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$z),
- "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_3D_U32_F32
@@ -2624,7 +2542,7 @@ def TEX_3D_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z),
- "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_3D_U32_F32_LEVEL
@@ -2632,7 +2550,7 @@ def TEX_3D_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_3D_U32_F32_GRAD
@@ -2643,7 +2561,7 @@ def TEX_3D_U32_F32_GRAD
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$gradx2, Float32Regs:$grady0,
Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], "
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
@@ -2654,7 +2572,7 @@ def TEX_CUBE_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_CUBE_F32_F32_LEVEL
@@ -2663,7 +2581,7 @@ def TEX_CUBE_F32_F32_LEVEL
(ins Int64Regs:$t, Int64Regs:$s,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_CUBE_S32_F32
@@ -2671,7 +2589,7 @@ def TEX_CUBE_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_CUBE_S32_F32_LEVEL
@@ -2680,7 +2598,7 @@ def TEX_CUBE_S32_F32_LEVEL
(ins Int64Regs:$t, Int64Regs:$s,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_CUBE_U32_F32
@@ -2688,7 +2606,7 @@ def TEX_CUBE_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_CUBE_U32_F32_LEVEL
@@ -2697,7 +2615,7 @@ def TEX_CUBE_U32_F32_LEVEL
(ins Int64Regs:$t, Int64Regs:$s,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
@@ -2706,7 +2624,7 @@ def TEX_CUBE_ARRAY_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $z\\}];",
[]>;
def TEX_CUBE_ARRAY_F32_F32_LEVEL
@@ -2715,7 +2633,7 @@ def TEX_CUBE_ARRAY_F32_F32_LEVEL
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
[]>;
def TEX_CUBE_ARRAY_S32_F32
@@ -2723,7 +2641,7 @@ def TEX_CUBE_ARRAY_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $z\\}];",
[]>;
def TEX_CUBE_ARRAY_S32_F32_LEVEL
@@ -2732,7 +2650,7 @@ def TEX_CUBE_ARRAY_S32_F32_LEVEL
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
[]>;
def TEX_CUBE_ARRAY_U32_F32
@@ -2740,7 +2658,7 @@ def TEX_CUBE_ARRAY_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $z\\}];",
[]>;
def TEX_CUBE_ARRAY_U32_F32_LEVEL
@@ -2749,7 +2667,7 @@ def TEX_CUBE_ARRAY_U32_F32_LEVEL
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
[]>;
@@ -2757,84 +2675,84 @@ def TLD4_R_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_G_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_B_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_A_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_R_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_G_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_B_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_A_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_R_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_G_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_B_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_A_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
}
@@ -2847,19 +2765,19 @@ def TEX_UNIFIED_1D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
[]>;
def TEX_UNIFIED_1D_F32_F32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
[]>;
def TEX_UNIFIED_1D_F32_F32_LEVEL
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
- "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x\\}], $lod;",
[]>;
def TEX_UNIFIED_1D_F32_F32_GRAD
@@ -2867,27 +2785,27 @@ def TEX_UNIFIED_1D_F32_F32_GRAD
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_UNIFIED_1D_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
[]>;
def TEX_UNIFIED_1D_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
[]>;
def TEX_UNIFIED_1D_S32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x\\}], $lod;",
[]>;
def TEX_UNIFIED_1D_S32_F32_GRAD
@@ -2895,27 +2813,27 @@ def TEX_UNIFIED_1D_S32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_UNIFIED_1D_U32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
[]>;
def TEX_UNIFIED_1D_U32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
[]>;
def TEX_UNIFIED_1D_U32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x\\}], $lod;",
[]>;
def TEX_UNIFIED_1D_U32_F32_GRAD
@@ -2923,7 +2841,7 @@ def TEX_UNIFIED_1D_U32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
@@ -2931,14 +2849,14 @@ def TEX_UNIFIED_1D_ARRAY_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}];",
[]>;
def TEX_UNIFIED_1D_ARRAY_F32_F32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}];",
[]>;
def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
@@ -2946,7 +2864,7 @@ def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}], $lod;",
[]>;
def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
@@ -2954,21 +2872,21 @@ def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_UNIFIED_1D_ARRAY_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}];",
[]>;
def TEX_UNIFIED_1D_ARRAY_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}];",
[]>;
def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
@@ -2976,7 +2894,7 @@ def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}], $lod;",
[]>;
def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
@@ -2984,21 +2902,21 @@ def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_UNIFIED_1D_ARRAY_U32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}];",
[]>;
def TEX_UNIFIED_1D_ARRAY_U32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}];",
[]>;
def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
@@ -3006,7 +2924,7 @@ def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}], $lod;",
[]>;
def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
@@ -3014,7 +2932,7 @@ def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
@@ -3022,14 +2940,14 @@ def TEX_UNIFIED_2D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TEX_UNIFIED_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TEX_UNIFIED_2D_F32_F32_LEVEL
@@ -3037,7 +2955,7 @@ def TEX_UNIFIED_2D_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$lod),
- "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}], $lod;",
[]>;
def TEX_UNIFIED_2D_F32_F32_GRAD
@@ -3046,7 +2964,7 @@ def TEX_UNIFIED_2D_F32_F32_GRAD
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -3054,14 +2972,14 @@ def TEX_UNIFIED_2D_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TEX_UNIFIED_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TEX_UNIFIED_2D_S32_F32_LEVEL
@@ -3069,7 +2987,7 @@ def TEX_UNIFIED_2D_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$lod),
- "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}], $lod;",
[]>;
def TEX_UNIFIED_2D_S32_F32_GRAD
@@ -3078,7 +2996,7 @@ def TEX_UNIFIED_2D_S32_F32_GRAD
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -3086,14 +3004,14 @@ def TEX_UNIFIED_2D_U32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TEX_UNIFIED_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TEX_UNIFIED_2D_U32_F32_LEVEL
@@ -3101,7 +3019,7 @@ def TEX_UNIFIED_2D_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$lod),
- "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}], $lod;",
[]>;
def TEX_UNIFIED_2D_U32_F32_GRAD
@@ -3110,7 +3028,7 @@ def TEX_UNIFIED_2D_U32_F32_GRAD
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -3120,7 +3038,7 @@ def TEX_UNIFIED_2D_ARRAY_F32_S32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$y),
- "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_UNIFIED_2D_ARRAY_F32_F32
@@ -3128,7 +3046,7 @@ def TEX_UNIFIED_2D_ARRAY_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y),
- "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
@@ -3136,7 +3054,7 @@ def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
@@ -3145,7 +3063,7 @@ def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -3154,7 +3072,7 @@ def TEX_UNIFIED_2D_ARRAY_S32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$y),
- "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_UNIFIED_2D_ARRAY_S32_F32
@@ -3162,7 +3080,7 @@ def TEX_UNIFIED_2D_ARRAY_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y),
- "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
@@ -3170,7 +3088,7 @@ def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
@@ -3180,7 +3098,7 @@ def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -3189,7 +3107,7 @@ def TEX_UNIFIED_2D_ARRAY_U32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$y),
- "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_UNIFIED_2D_ARRAY_U32_F32
@@ -3197,7 +3115,7 @@ def TEX_UNIFIED_2D_ARRAY_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y),
- "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
@@ -3205,7 +3123,7 @@ def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
@@ -3215,7 +3133,7 @@ def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -3225,7 +3143,7 @@ def TEX_UNIFIED_3D_F32_S32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$z),
- "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_3D_F32_F32
@@ -3233,7 +3151,7 @@ def TEX_UNIFIED_3D_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z),
- "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_3D_F32_F32_LEVEL
@@ -3241,7 +3159,7 @@ def TEX_UNIFIED_3D_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_3D_F32_F32_GRAD
@@ -3252,7 +3170,7 @@ def TEX_UNIFIED_3D_F32_F32_GRAD
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$gradx2, Float32Regs:$grady0,
Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], "
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
@@ -3262,7 +3180,7 @@ def TEX_UNIFIED_3D_S32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$z),
- "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_3D_S32_F32
@@ -3270,7 +3188,7 @@ def TEX_UNIFIED_3D_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z),
- "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_3D_S32_F32_LEVEL
@@ -3278,7 +3196,7 @@ def TEX_UNIFIED_3D_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_3D_S32_F32_GRAD
@@ -3289,7 +3207,7 @@ def TEX_UNIFIED_3D_S32_F32_GRAD
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$gradx2, Float32Regs:$grady0,
Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], "
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
@@ -3299,7 +3217,7 @@ def TEX_UNIFIED_3D_U32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$z),
- "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_3D_U32_F32
@@ -3307,7 +3225,7 @@ def TEX_UNIFIED_3D_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z),
- "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_3D_U32_F32_LEVEL
@@ -3315,7 +3233,7 @@ def TEX_UNIFIED_3D_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_3D_U32_F32_GRAD
@@ -3326,7 +3244,7 @@ def TEX_UNIFIED_3D_U32_F32_GRAD
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$gradx2, Float32Regs:$grady0,
Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], "
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
@@ -3337,7 +3255,7 @@ def TEX_UNIFIED_CUBE_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_CUBE_F32_F32_LEVEL
@@ -3346,7 +3264,7 @@ def TEX_UNIFIED_CUBE_F32_F32_LEVEL
(ins Int64Regs:$t,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_CUBE_S32_F32
@@ -3354,7 +3272,7 @@ def TEX_UNIFIED_CUBE_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_CUBE_S32_F32_LEVEL
@@ -3363,7 +3281,7 @@ def TEX_UNIFIED_CUBE_S32_F32_LEVEL
(ins Int64Regs:$t,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_CUBE_U32_F32
@@ -3371,7 +3289,7 @@ def TEX_UNIFIED_CUBE_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_CUBE_U32_F32_LEVEL
@@ -3380,7 +3298,7 @@ def TEX_UNIFIED_CUBE_U32_F32_LEVEL
(ins Int64Regs:$t,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
@@ -3389,7 +3307,7 @@ def TEX_UNIFIED_CUBE_ARRAY_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $z\\}];",
[]>;
def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
@@ -3398,7 +3316,7 @@ def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
(ins Int64Regs:$t, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_CUBE_ARRAY_S32_F32
@@ -3406,7 +3324,7 @@ def TEX_UNIFIED_CUBE_ARRAY_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $z\\}];",
[]>;
def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
@@ -3415,7 +3333,7 @@ def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
(ins Int64Regs:$t, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_CUBE_ARRAY_U32_F32
@@ -3423,7 +3341,7 @@ def TEX_UNIFIED_CUBE_ARRAY_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $z\\}];",
[]>;
def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
@@ -3432,7 +3350,7 @@ def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
(ins Int64Regs:$t, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $z\\}], $lod;",
[]>;
@@ -3440,84 +3358,84 @@ def TLD4_UNIFIED_R_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_G_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_B_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_A_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_R_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_G_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_B_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_A_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_R_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_G_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_B_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_A_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
}
@@ -7172,12 +7090,12 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
class PTX_READ_SREG_R64<string regname, Intrinsic intop>
: NVPTXInst<(outs Int64Regs:$d), (ins),
- !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
+ !strconcat("mov.u64 \t$d, %", regname, ";"),
[(set Int64Regs:$d, (intop))]>;
class PTX_READ_SREG_R32<string regname, Intrinsic intop>
: NVPTXInst<(outs Int32Regs:$d), (ins),
- !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
+ !strconcat("mov.u32 \t$d, %", regname, ";"),
[(set Int32Regs:$d, (intop))]>;
// TODO Add read vector-version of special registers
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index b925b632ee4a..3be291b48b8f 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
#define DEBUG_TYPE "nvptx"
@@ -54,188 +55,6 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
char NVPTXLowerAggrCopies::ID = 0;
-// Lower memcpy to loop.
-void convertMemCpyToLoop(Instruction *ConvertedInst, Value *SrcAddr,
- Value *DstAddr, Value *CopyLen, bool SrcIsVolatile,
- bool DstIsVolatile, LLVMContext &Context,
- Function &F) {
- Type *TypeOfCopyLen = CopyLen->getType();
-
- BasicBlock *OrigBB = ConvertedInst->getParent();
- BasicBlock *NewBB =
- ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split");
- BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB);
-
- OrigBB->getTerminator()->setSuccessor(0, LoopBB);
- IRBuilder<> Builder(OrigBB->getTerminator());
-
- // SrcAddr and DstAddr are expected to be pointer types,
- // so no check is made here.
- unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
- unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
-
- // Cast pointers to (char *)
- SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
- DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
-
- IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
- LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
-
- // load from SrcAddr+LoopIndex
- // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
- // word-sized loads and stores.
- Value *Element =
- LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP(
- LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex),
- SrcIsVolatile);
- // store at DstAddr+LoopIndex
- LoopBuilder.CreateStore(Element,
- LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(),
- DstAddr, LoopIndex),
- DstIsVolatile);
-
- // The value for LoopIndex coming from backedge is (LoopIndex + 1)
- Value *NewIndex =
- LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
- LoopIndex->addIncoming(NewIndex, LoopBB);
-
- LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
- NewBB);
-}
-
-// Lower memmove to IR. memmove is required to correctly copy overlapping memory
-// regions; therefore, it has to check the relative positions of the source and
-// destination pointers and choose the copy direction accordingly.
-//
-// The code below is an IR rendition of this C function:
-//
-// void* memmove(void* dst, const void* src, size_t n) {
-// unsigned char* d = dst;
-// const unsigned char* s = src;
-// if (s < d) {
-// // copy backwards
-// while (n--) {
-// d[n] = s[n];
-// }
-// } else {
-// // copy forward
-// for (size_t i = 0; i < n; ++i) {
-// d[i] = s[i];
-// }
-// }
-// return dst;
-// }
-void convertMemMoveToLoop(Instruction *ConvertedInst, Value *SrcAddr,
- Value *DstAddr, Value *CopyLen, bool SrcIsVolatile,
- bool DstIsVolatile, LLVMContext &Context,
- Function &F) {
- Type *TypeOfCopyLen = CopyLen->getType();
- BasicBlock *OrigBB = ConvertedInst->getParent();
-
- // Create the a comparison of src and dst, based on which we jump to either
- // the forward-copy part of the function (if src >= dst) or the backwards-copy
- // part (if src < dst).
- // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
- // structure. Its block terminators (unconditional branches) are replaced by
- // the appropriate conditional branches when the loop is built.
- ICmpInst *PtrCompare = new ICmpInst(ConvertedInst, ICmpInst::ICMP_ULT,
- SrcAddr, DstAddr, "compare_src_dst");
- TerminatorInst *ThenTerm, *ElseTerm;
- SplitBlockAndInsertIfThenElse(PtrCompare, ConvertedInst, &ThenTerm,
- &ElseTerm);
-
- // Each part of the function consists of two blocks:
- // copy_backwards: used to skip the loop when n == 0
- // copy_backwards_loop: the actual backwards loop BB
- // copy_forward: used to skip the loop when n == 0
- // copy_forward_loop: the actual forward loop BB
- BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
- CopyBackwardsBB->setName("copy_backwards");
- BasicBlock *CopyForwardBB = ElseTerm->getParent();
- CopyForwardBB->setName("copy_forward");
- BasicBlock *ExitBB = ConvertedInst->getParent();
- ExitBB->setName("memmove_done");
-
- // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
- // between both backwards and forward copy clauses.
- ICmpInst *CompareN =
- new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
- ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
-
- // Copying backwards.
- BasicBlock *LoopBB =
- BasicBlock::Create(Context, "copy_backwards_loop", &F, CopyForwardBB);
- IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
- Value *IndexPtr = LoopBuilder.CreateSub(
- LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
- Value *Element = LoopBuilder.CreateLoad(
- LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element");
- LoopBuilder.CreateStore(Element,
- LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
- LoopBuilder.CreateCondBr(
- LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
- ExitBB, LoopBB);
- LoopPhi->addIncoming(IndexPtr, LoopBB);
- LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
- BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
- ThenTerm->eraseFromParent();
-
- // Copying forward.
- BasicBlock *FwdLoopBB =
- BasicBlock::Create(Context, "copy_forward_loop", &F, ExitBB);
- IRBuilder<> FwdLoopBuilder(FwdLoopBB);
- PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
- Value *FwdElement = FwdLoopBuilder.CreateLoad(
- FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element");
- FwdLoopBuilder.CreateStore(
- FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
- Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
- FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
- FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
- ExitBB, FwdLoopBB);
- FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
- FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
-
- BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
- ElseTerm->eraseFromParent();
-}
-
-// Lower memset to loop.
-void convertMemSetToLoop(Instruction *ConvertedInst, Value *DstAddr,
- Value *CopyLen, Value *SetValue, LLVMContext &Context,
- Function &F) {
- BasicBlock *OrigBB = ConvertedInst->getParent();
- BasicBlock *NewBB =
- ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split");
- BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB);
-
- OrigBB->getTerminator()->setSuccessor(0, LoopBB);
- IRBuilder<> Builder(OrigBB->getTerminator());
-
- // Cast pointer to the type of value getting stored
- unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
- DstAddr = Builder.CreateBitCast(DstAddr,
- PointerType::get(SetValue->getType(), dstAS));
-
- IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0);
- LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB);
-
- LoopBuilder.CreateStore(
- SetValue,
- LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
- false);
-
- Value *NewIndex =
- LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1));
- LoopIndex->addIncoming(NewIndex, LoopBB);
-
- LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
- NewBB);
-}
-
bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
SmallVector<LoadInst *, 4> AggrLoads;
SmallVector<MemIntrinsic *, 4> MemCalls;
@@ -287,13 +106,13 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
- convertMemCpyToLoop(/* ConvertedInst */ SI,
- /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
- /* CopyLen */ CopyLen,
- /* SrcIsVolatile */ LI->isVolatile(),
- /* DstIsVolatile */ SI->isVolatile(),
- /* Context */ Context,
- /* Function F */ F);
+ createMemCpyLoop(/* ConvertedInst */ SI,
+ /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
+ /* CopyLen */ CopyLen,
+ /* SrcAlign */ LI->getAlignment(),
+ /* DestAlign */ SI->getAlignment(),
+ /* SrcIsVolatile */ LI->isVolatile(),
+ /* DstIsVolatile */ SI->isVolatile());
SI->eraseFromParent();
LI->eraseFromParent();
@@ -302,31 +121,11 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
// Transform mem* intrinsic calls.
for (MemIntrinsic *MemCall : MemCalls) {
if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
- convertMemCpyToLoop(/* ConvertedInst */ Memcpy,
- /* SrcAddr */ Memcpy->getRawSource(),
- /* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ Memcpy->getLength(),
- /* SrcIsVolatile */ Memcpy->isVolatile(),
- /* DstIsVolatile */ Memcpy->isVolatile(),
- /* Context */ Context,
- /* Function F */ F);
+ expandMemCpyAsLoop(Memcpy);
} else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
- convertMemMoveToLoop(/* ConvertedInst */ Memmove,
- /* SrcAddr */ Memmove->getRawSource(),
- /* DstAddr */ Memmove->getRawDest(),
- /* CopyLen */ Memmove->getLength(),
- /* SrcIsVolatile */ Memmove->isVolatile(),
- /* DstIsVolatile */ Memmove->isVolatile(),
- /* Context */ Context,
- /* Function F */ F);
-
+ expandMemMoveAsLoop(Memmove);
} else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
- convertMemSetToLoop(/* ConvertedInst */ Memset,
- /* DstAddr */ Memset->getRawDest(),
- /* CopyLen */ Memset->getLength(),
- /* SetValue */ Memset->getValue(),
- /* Context */ Context,
- /* Function F */ F);
+ expandMemSetAsLoop(Memset);
}
MemCall->eraseFromParent();
}
diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 3f0c7be7863d..5b626cbcd5ba 100644
--- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -159,7 +159,8 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
assert(PType && "Expecting pointer type in handleByValParam");
Type *StructType = PType->getElementType();
- AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst);
+ unsigned AS = Func->getParent()->getDataLayout().getAllocaAddrSpace();
+ AllocaInst *AllocA = new AllocaInst(StructType, AS, Arg->getName(), FirstInst);
// Set the alignment to alignment of the byval parameter. This is because,
// later load/stores assume that alignment, and we are going to replace
// the use of the byval parameter with this alloca instruction.
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp
index eab5ee80561e..86a28f7d0700 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -27,6 +27,13 @@ void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
switch (Kind) {
default: llvm_unreachable("Invalid kind!");
+ case VK_NVPTX_HALF_PREC_FLOAT:
+ // ptxas does not have a way to specify half-precision floats.
+ // Instead we have to print and load fp16 constants as .b16
+ OS << "0x";
+ NumHex = 4;
+ APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
+ break;
case VK_NVPTX_SINGLE_PREC_FLOAT:
OS << "0f";
NumHex = 8;
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 7f833c42fa8f..95741d9b0451 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -22,8 +22,9 @@ class NVPTXFloatMCExpr : public MCTargetExpr {
public:
enum VariantKind {
VK_NVPTX_None,
- VK_NVPTX_SINGLE_PREC_FLOAT, // FP constant in single-precision
- VK_NVPTX_DOUBLE_PREC_FLOAT // FP constant in double-precision
+ VK_NVPTX_HALF_PREC_FLOAT, // FP constant in half-precision
+ VK_NVPTX_SINGLE_PREC_FLOAT, // FP constant in single-precision
+ VK_NVPTX_DOUBLE_PREC_FLOAT // FP constant in double-precision
};
private:
@@ -40,6 +41,11 @@ public:
static const NVPTXFloatMCExpr *create(VariantKind Kind, const APFloat &Flt,
MCContext &Ctx);
+ static const NVPTXFloatMCExpr *createConstantFPHalf(const APFloat &Flt,
+ MCContext &Ctx) {
+ return create(VK_NVPTX_HALF_PREC_FLOAT, Flt, Ctx);
+ }
+
static const NVPTXFloatMCExpr *createConstantFPSingle(const APFloat &Flt,
MCContext &Ctx) {
return create(VK_NVPTX_SINGLE_PREC_FLOAT, Flt, Ctx);
diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp
index 49e639793efc..e10b046f7c97 100644
--- a/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -113,7 +113,7 @@ static void CombineCVTAToLocal(MachineInstr &Root) {
BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
Root.getOperand(0).getReg())
.addReg(NVPTX::VRFrameLocal)
- .addOperand(Prev.getOperand(2));
+ .add(Prev.getOperand(2));
MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 6cbf0604d7ef..8d46694fbe50 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -27,12 +27,19 @@ using namespace llvm;
namespace llvm {
std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
- if (RC == &NVPTX::Float32RegsRegClass) {
+ if (RC == &NVPTX::Float32RegsRegClass)
return ".f32";
- }
- if (RC == &NVPTX::Float64RegsRegClass) {
+ if (RC == &NVPTX::Float16RegsRegClass)
+ // Ideally fp16 registers should be .f16, but this syntax is only
+ // supported on sm_53+. On the other hand, .b16 registers are
+ // accepted for all supported fp16 instructions on all GPU
+ // variants, so we can use them instead.
+ return ".b16";
+ if (RC == &NVPTX::Float16x2RegsRegClass)
+ return ".b32";
+ if (RC == &NVPTX::Float64RegsRegClass)
return ".f64";
- } else if (RC == &NVPTX::Int64RegsRegClass) {
+ if (RC == &NVPTX::Int64RegsRegClass)
// We use untyped (.b) integer registers here as NVCC does.
// Correctness of generated code does not depend on register type,
// but using .s/.u registers runs into ptxas bug that prevents
@@ -52,40 +59,37 @@ std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
// add.f16v2 rb32,rb32,rb32; // OK
// add.f16v2 rs32,rs32,rs32; // OK
return ".b64";
- } else if (RC == &NVPTX::Int32RegsRegClass) {
+ if (RC == &NVPTX::Int32RegsRegClass)
return ".b32";
- } else if (RC == &NVPTX::Int16RegsRegClass) {
+ if (RC == &NVPTX::Int16RegsRegClass)
return ".b16";
- } else if (RC == &NVPTX::Int1RegsRegClass) {
+ if (RC == &NVPTX::Int1RegsRegClass)
return ".pred";
- } else if (RC == &NVPTX::SpecialRegsRegClass) {
+ if (RC == &NVPTX::SpecialRegsRegClass)
return "!Special!";
- } else {
- return "INTERNAL";
- }
- return "";
+ return "INTERNAL";
}
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
- if (RC == &NVPTX::Float32RegsRegClass) {
+ if (RC == &NVPTX::Float32RegsRegClass)
return "%f";
- }
- if (RC == &NVPTX::Float64RegsRegClass) {
+ if (RC == &NVPTX::Float16RegsRegClass)
+ return "%h";
+ if (RC == &NVPTX::Float16x2RegsRegClass)
+ return "%hh";
+ if (RC == &NVPTX::Float64RegsRegClass)
return "%fd";
- } else if (RC == &NVPTX::Int64RegsRegClass) {
+ if (RC == &NVPTX::Int64RegsRegClass)
return "%rd";
- } else if (RC == &NVPTX::Int32RegsRegClass) {
+ if (RC == &NVPTX::Int32RegsRegClass)
return "%r";
- } else if (RC == &NVPTX::Int16RegsRegClass) {
+ if (RC == &NVPTX::Int16RegsRegClass)
return "%rs";
- } else if (RC == &NVPTX::Int1RegsRegClass) {
+ if (RC == &NVPTX::Int1RegsRegClass)
return "%p";
- } else if (RC == &NVPTX::SpecialRegsRegClass) {
+ if (RC == &NVPTX::SpecialRegsRegClass)
return "!Special!";
- } else {
- return "INTERNAL";
- }
- return "";
+ return "INTERNAL";
}
}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index ff6ccc457db7..f04764a9e9a3 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -36,6 +36,8 @@ foreach i = 0-4 in {
def RS#i : NVPTXReg<"%rs"#i>; // 16-bit
def R#i : NVPTXReg<"%r"#i>; // 32-bit
def RL#i : NVPTXReg<"%rd"#i>; // 64-bit
+ def H#i : NVPTXReg<"%h"#i>; // 16-bit float
+ def HH#i : NVPTXReg<"%hh"#i>; // 2x16-bit float
def F#i : NVPTXReg<"%f"#i>; // 32-bit float
def FL#i : NVPTXReg<"%fd"#i>; // 64-bit float
@@ -57,6 +59,8 @@ def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>;
def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 4))>;
def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 4))>;
def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4))>;
+def Float16Regs : NVPTXRegClass<[f16], 16, (add (sequence "H%u", 0, 4))>;
+def Float16x2Regs : NVPTXRegClass<[v2f16], 32, (add (sequence "HH%u", 0, 4))>;
def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 4))>;
def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 4))>;
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index b0472de980fc..d736eaa41301 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -31,7 +31,7 @@ public:
/// Override this as NVPTX has its own way of printing switching
/// to a section.
- void PrintSwitchToSection(const MCAsmInfo &MAI,
+ void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const override {}
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 6e1f427ed021..acbee86ae386 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -23,6 +23,11 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "NVPTXGenSubtargetInfo.inc"
+static cl::opt<bool>
+ NoF16Math("nvptx-no-f16-math", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("NVPTX Specific: Disable generation of f16 math ops."),
+ cl::init(false));
+
// Pin the vtable to this file.
void NVPTXSubtarget::anchor() {}
@@ -57,3 +62,7 @@ bool NVPTXSubtarget::hasImageHandles() const {
// Disabled, otherwise
return false;
}
+
+bool NVPTXSubtarget::allowFP16Math() const {
+ return hasFP16Math() && NoF16Math == false;
+}
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index da020a94bcdd..96618cf46373 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -101,6 +101,8 @@ public:
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
inline bool hasROT64() const { return SmVersion >= 20; }
bool hasImageHandles() const;
+ bool hasFP16Math() const { return SmVersion >= 53; }
+ bool allowFP16Math() const;
unsigned int getSmVersion() const { return SmVersion; }
std::string getTargetName() const { return TargetName; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index eb357e0a4d50..ab5298d0dcfd 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Vectorize.h"
@@ -50,7 +51,6 @@ void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
-void initializeNVPTXInferAddressSpacesPass(PassRegistry &);
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerArgsPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);
@@ -70,7 +70,6 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeGenericToNVVMPass(PR);
initializeNVPTXAllocaHoistingPass(PR);
initializeNVPTXAssignValidGlobalNamesPass(PR);
- initializeNVPTXInferAddressSpacesPass(PR);
initializeNVPTXLowerArgsPass(PR);
initializeNVPTXLowerAllocaPass(PR);
initializeNVPTXLowerAggrCopiesPass(PR);
@@ -167,9 +166,13 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
return new NVPTXPassConfig(this, PM);
}
-void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {
- PM.add(createNVVMReflectPass());
- PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
+void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
+ Builder.addExtension(
+ PassManagerBuilder::EP_EarlyAsPossible,
+ [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ PM.add(createNVVMReflectPass());
+ PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
+ });
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
@@ -190,7 +193,7 @@ void NVPTXPassConfig::addAddressSpaceInferencePasses() {
// be eliminated by SROA.
addPass(createSROAPass());
addPass(createNVPTXLowerAllocaPass());
- addPass(createNVPTXInferAddressSpacesPass());
+ addPass(createInferAddressSpacesPass());
}
void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 78a053831772..1ed8e3b1e935 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -61,7 +61,8 @@ public:
return TLOF.get();
}
- void addEarlyAsPossiblePasses(PassManagerBase &PM) override;
+ void adjustPassManager(PassManagerBuilder &) override;
+
TargetIRAnalysis getTargetIRAnalysis() override;
}; // NVPTXTargetMachine.
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index b6c271ae4cbc..03075b550429 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -45,6 +45,10 @@ public:
bool isSourceOfDivergence(const Value *V);
+ unsigned getFlatAddressSpace() const {
+ return AddressSpace::ADDRESS_SPACE_GENERIC;
+ }
+
// Increase the inlining cost threshold by a factor of 5, reflecting that
// calls are particularly expensive in NVPTX.
unsigned getInliningThresholdMultiplier() { return 5; }
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index c639c4dc0683..152b665d0fdc 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -10,11 +10,10 @@
// This pass replaces occurrences of __nvvm_reflect("foo") and llvm.nvvm.reflect
// with an integer.
//
-// We choose the value we use by looking, in this order, at:
-//
-// * the -nvvm-reflect-list flag, which has the format "foo=1,bar=42",
-// * the StringMap passed to the pass's constructor, and
-// * metadata in the module itself.
+// We choose the value we use by looking at metadata in the module itself. Note
+// that we intentionally only have one way to choose these values, because other
+// parts of LLVM (particularly, InstCombineCall) rely on being able to predict
+// the values chosen by this pass.
//
// If we see an unknown string, we replace its call with 0.
//
@@ -49,30 +48,17 @@ namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
namespace {
class NVVMReflect : public FunctionPass {
-private:
- StringMap<int> VarMap;
-
public:
static char ID;
- NVVMReflect() : NVVMReflect(StringMap<int>()) {}
-
- NVVMReflect(const StringMap<int> &Mapping)
- : FunctionPass(ID), VarMap(Mapping) {
+ NVVMReflect() : FunctionPass(ID) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
- setVarMap();
}
bool runOnFunction(Function &) override;
-
-private:
- void setVarMap();
};
}
FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); }
-FunctionPass *llvm::createNVVMReflectPass(const StringMap<int> &Mapping) {
- return new NVVMReflect(Mapping);
-}
static cl::opt<bool>
NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
@@ -83,35 +69,6 @@ INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
"Replace occurrences of __nvvm_reflect() calls with 0/1", false,
false)
-static cl::list<std::string>
-ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"), cl::Hidden,
- cl::desc("A list of string=num assignments"),
- cl::ValueRequired);
-
-/// The command line can look as follows :
-/// -nvvm-reflect-list a=1,b=2 -nvvm-reflect-list c=3,d=0 -R e=2
-/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the
-/// ReflectList vector. First, each of ReflectList[i] is 'split'
-/// using "," as the delimiter. Then each of this part is split
-/// using "=" as the delimiter.
-void NVVMReflect::setVarMap() {
- for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
- DEBUG(dbgs() << "Option : " << ReflectList[i] << "\n");
- SmallVector<StringRef, 4> NameValList;
- StringRef(ReflectList[i]).split(NameValList, ',');
- for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
- SmallVector<StringRef, 2> NameValPair;
- NameValList[j].split(NameValPair, '=');
- assert(NameValPair.size() == 2 && "name=val expected");
- std::stringstream ValStream(NameValPair[1]);
- int Val;
- ValStream >> Val;
- assert((!(ValStream.fail())) && "integer value expected");
- VarMap[NameValPair[0]] = Val;
- }
- }
-}
-
bool NVVMReflect::runOnFunction(Function &F) {
if (!NVVMReflectEnabled)
return false;
@@ -199,11 +156,10 @@ bool NVVMReflect::runOnFunction(Function &F) {
DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
int ReflectVal = 0; // The default value is 0
- auto Iter = VarMap.find(ReflectArg);
- if (Iter != VarMap.end())
- ReflectVal = Iter->second;
- else if (ReflectArg == "__CUDA_FTZ") {
- // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag.
+ if (ReflectArg == "__CUDA_FTZ") {
+ // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag. Our
+ // choice here must be kept in sync with AutoUpgrade, which uses the same
+ // technique to detect whether ftz is enabled.
if (auto *Flag = mdconst::extract_or_null<ConstantInt>(
F.getParent()->getModuleFlag("nvvm-reflect-ftz")))
ReflectVal = Flag->getSExtValue();
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 4842c3b7a656..7ca4c1999003 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -40,6 +40,7 @@ add_llvm_target(PowerPCCodeGen
PPCVSXCopy.cpp
PPCVSXFMAMutate.cpp
PPCVSXSwapRemoval.cpp
+ PPCExpandISEL.cpp
)
add_subdirectory(AsmParser)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 5847b3a52bfc..4863ac542736 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -114,7 +114,7 @@ public:
}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override {
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 017d21af08a8..a00b56af0490 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -11,22 +11,28 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCInstrInfo.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "PPCInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOpcodes.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
@@ -34,10 +40,8 @@ using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace {
-class PPCMCCodeEmitter : public MCCodeEmitter {
- PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
- void operator=(const PPCMCCodeEmitter &) = delete;
+class PPCMCCodeEmitter : public MCCodeEmitter {
const MCInstrInfo &MCII;
const MCContext &CTX;
bool IsLittleEndian;
@@ -46,8 +50,9 @@ public:
PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: MCII(mcii), CTX(ctx),
IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
-
- ~PPCMCCodeEmitter() override {}
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
+ void operator=(const PPCMCCodeEmitter &) = delete;
+ ~PPCMCCodeEmitter() override = default;
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
@@ -103,6 +108,7 @@ public:
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override {
@@ -137,7 +143,7 @@ public:
}
break;
default:
- llvm_unreachable ("Invalid instruction size");
+ llvm_unreachable("Invalid instruction size");
}
++MCNumEmitted; // Keep track of the # of mi's emitted.
@@ -238,7 +244,6 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
return RegBits;
}
-
unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -286,7 +291,6 @@ unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
return reverseBits(Imm | RegBits) >> 22;
}
-
unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI)
@@ -302,7 +306,6 @@ unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
return reverseBits(Imm | RegBits) >> 22;
}
-
unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI)
@@ -318,7 +321,6 @@ unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
return reverseBits(Imm | RegBits) >> 22;
}
-
unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -383,7 +385,5 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return MO.getImm();
}
-
-
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "PPCGenMCCodeEmitter.inc"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index bbd10e5b260f..2d686f227919 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -11,22 +11,29 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCMCTargetDesc.h"
#include "InstPrinter/PPCInstPrinter.h"
-#include "PPCMCAsmInfo.h"
+#include "MCTargetDesc/PPCMCAsmInfo.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPCTargetStreamer.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -41,9 +48,10 @@ using namespace llvm;
#include "PPCGenRegisterInfo.inc"
// Pin the vtable to this file.
-PPCTargetStreamer::~PPCTargetStreamer() {}
PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+PPCTargetStreamer::~PPCTargetStreamer() = default;
+
static MCInstrInfo *createPPCMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitPPCMCInstrInfo(X);
@@ -96,12 +104,14 @@ static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
}
namespace {
+
class PPCTargetAsmStreamer : public PPCTargetStreamer {
formatted_raw_ostream &OS;
public:
PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
: PPCTargetStreamer(S), OS(OS) {}
+
void emitTCEntry(const MCSymbol &S) override {
OS << "\t.tc ";
OS << S.getName();
@@ -109,12 +119,15 @@ public:
OS << S.getName();
OS << '\n';
}
+
void emitMachine(StringRef CPU) override {
OS << "\t.machine " << CPU << '\n';
}
+
void emitAbiVersion(int AbiVersion) override {
OS << "\t.abiversion " << AbiVersion << '\n';
}
+
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
@@ -129,18 +142,22 @@ public:
class PPCTargetELFStreamer : public PPCTargetStreamer {
public:
PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+
MCELFStreamer &getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
}
+
void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
Streamer.EmitValueToAlignment(8);
Streamer.EmitSymbolValue(&S, 8);
}
+
void emitMachine(StringRef CPU) override {
// FIXME: Is there anything to do in here or does this directive only
// limit the parser?
}
+
void emitAbiVersion(int AbiVersion) override {
MCAssembler &MCA = getStreamer().getAssembler();
unsigned Flags = MCA.getELFHeaderEFlags();
@@ -148,6 +165,7 @@ public:
Flags |= (AbiVersion & ELF::EF_PPC64_ABI);
MCA.setELFHeaderEFlags(Flags);
}
+
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
MCAssembler &MCA = getStreamer().getAssembler();
@@ -170,6 +188,7 @@ public:
if ((Flags & ELF::EF_PPC64_ABI) == 0)
MCA.setELFHeaderEFlags(Flags | 2);
}
+
void emitAssignment(MCSymbol *S, const MCExpr *Value) override {
auto *Symbol = cast<MCSymbolELF>(S);
// When encoding an assignment to set symbol A to symbol B, also copy
@@ -188,21 +207,26 @@ public:
class PPCTargetMachOStreamer : public PPCTargetStreamer {
public:
PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+
void emitTCEntry(const MCSymbol &S) override {
llvm_unreachable("Unknown pseudo-op: .tc");
}
+
void emitMachine(StringRef CPU) override {
// FIXME: We should update the CPUType, CPUSubType in the Object file if
// the new values are different from the defaults.
}
+
void emitAbiVersion(int AbiVersion) override {
llvm_unreachable("Unknown pseudo-op: .abiversion");
}
+
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
llvm_unreachable("Unknown pseudo-op: .localentry");
}
};
-}
+
+} // end anonymous namespace
static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 0989e0c8e268..893233ee2300 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -17,23 +17,22 @@
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
-#include "llvm/Support/DataTypes.h"
#include "llvm/Support/MathExtras.h"
+#include <cstdint>
namespace llvm {
+
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
class MCObjectWriter;
class MCRegisterInfo;
-class MCSubtargetInfo;
class MCTargetOptions;
class Target;
class Triple;
class StringRef;
class raw_pwrite_stream;
-class raw_ostream;
Target &getThePPC32Target();
Target &getThePPC64Target();
@@ -83,7 +82,7 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
return false;
}
-} // End llvm namespace
+} // end namespace llvm
// Generated files will use "namespace PPC". To avoid symbol clash,
// undefine PPC here. PPC may be predefined on some hosts.
@@ -103,4 +102,4 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
#define GET_SUBTARGETINFO_ENUM
#include "PPCGenSubtargetInfo.inc"
-#endif
+#endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index e01f49dce81e..38ae62b26757 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -45,11 +45,13 @@ namespace llvm {
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
+ FunctionPass *createPPCExpandISELPass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
+ void initializePPCExpandISELPass(PassRegistry &);
extern char &PPCVSXFMAMutateID;
namespace PPCII {
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index f0e0ebc4946c..1f181d007f63 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -112,7 +112,9 @@ public:
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<PPCSubtarget>();
- return AsmPrinter::runOnMachineFunction(MF);
+ bool Changed = AsmPrinter::runOnMachineFunction(MF);
+ emitXRayTable();
+ return Changed;
}
};
@@ -134,6 +136,7 @@ public:
void EmitFunctionBodyStart() override;
void EmitFunctionBodyEnd() override;
+ void EmitInstruction(const MachineInstr *MI) override;
};
/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
@@ -402,7 +405,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
.addImm(CallTarget & 0xFFFF));
// Save the current TOC pointer before the remote call.
- int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
+ int TOCSaveOffset = Subtarget->getFrameLowering()->getTOCSaveOffset();
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::STD)
.addReg(PPC::X2)
.addImm(TOCSaveOffset)
@@ -1046,6 +1049,97 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
}
+void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ if (!Subtarget->isPPC64())
+ return PPCAsmPrinter::EmitInstruction(MI);
+
+ switch (MI->getOpcode()) {
+ default:
+ return PPCAsmPrinter::EmitInstruction(MI);
+ case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
+ // .begin:
+ // b .end # lis 0, FuncId[16..32]
+ // nop # li 0, FuncId[0..15]
+ // std 0, -8(1)
+ // mflr 0
+ // bl __xray_FunctionEntry
+ // mtlr 0
+ // .end:
+ //
+ // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
+ // of instructions change.
+ MCSymbol *BeginOfSled = OutContext.createTempSymbol();
+ MCSymbol *EndOfSled = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(BeginOfSled);
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(PPC::B).addExpr(
+ MCSymbolRefExpr::create(EndOfSled, OutContext)));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(PPC::STD).addReg(PPC::X0).addImm(-8).addReg(PPC::X1));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR8).addReg(PPC::X0));
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(PPC::BL8_NOP)
+ .addExpr(MCSymbolRefExpr::create(
+ OutContext.getOrCreateSymbol("__xray_FunctionEntry"),
+ OutContext)));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0));
+ OutStreamer->EmitLabel(EndOfSled);
+ recordSled(BeginOfSled, *MI, SledKind::FUNCTION_ENTER);
+ break;
+ }
+ case TargetOpcode::PATCHABLE_FUNCTION_EXIT: {
+ // .p2align 3
+ // .begin:
+ // b(lr)? # lis 0, FuncId[16..32]
+ // nop # li 0, FuncId[0..15]
+ // std 0, -8(1)
+ // mflr 0
+ // bl __xray_FunctionExit
+ // mtlr 0
+ // .end:
+ // b(lr)?
+ //
+ // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
+ // of instructions change.
+ const MachineInstr *Next = [&] {
+ MachineBasicBlock::const_iterator It(MI);
+ assert(It != MI->getParent()->end());
+ ++It;
+ assert(It->isReturn());
+ return &*It;
+ }();
+ OutStreamer->EmitCodeAlignment(8);
+ MCSymbol *BeginOfSled = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(BeginOfSled);
+ MCInst TmpInst;
+ LowerPPCMachineInstrToMCInst(Next, TmpInst, *this, false);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(PPC::STD).addReg(PPC::X0).addImm(-8).addReg(PPC::X1));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR8).addReg(PPC::X0));
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(PPC::BL8_NOP)
+ .addExpr(MCSymbolRefExpr::create(
+ OutContext.getOrCreateSymbol("__xray_FunctionExit"),
+ OutContext)));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0));
+ recordSled(BeginOfSled, *MI, SledKind::FUNCTION_EXIT);
+ break;
+ }
+ case TargetOpcode::PATCHABLE_TAIL_CALL:
+ case TargetOpcode::PATCHABLE_RET:
+ // PPC's tail call instruction, e.g. PPC::TCRETURNdi8, doesn't really
+ // lower to a PPC::B instruction. The PPC::B instruction is generated
+ // before it, and handled by the normal case.
+ llvm_unreachable("Tail call is handled in the normal case. See comments"
+ "around this assert.");
+ }
+}
+
void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (static_cast<const PPCTargetMachine &>(TM).isELFv2ABI()) {
PPCTargetStreamer *TS =
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index ae76386fdfb6..b7d3154d0000 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -78,7 +78,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
BlockSizes.resize(Fn.getNumBlockIDs());
auto GetAlignmentAdjustment =
- [TII](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
+ [](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
unsigned Align = MBB.getAlignment();
if (!Align)
return 0;
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 2c62a0f1d909..70c4170653ae 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -298,15 +298,17 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
return true;
else
continue; // ISD::FCOPYSIGN is never a library call.
- case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
- case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
- case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
- case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
- case Intrinsic::rint: Opcode = ISD::FRINT; break;
- case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
- case Intrinsic::round: Opcode = ISD::FROUND; break;
- case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
- case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
+ case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
+ case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
}
}
@@ -315,7 +317,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
// (i.e. soft float or atomics). If adapting for targets that do,
// additional care is required here.
- LibFunc::Func Func;
+ LibFunc Func;
if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
LibInfo->getLibFunc(F->getName(), Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
@@ -329,50 +331,50 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
switch (Func) {
default: return true;
- case LibFunc::copysign:
- case LibFunc::copysignf:
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
continue; // ISD::FCOPYSIGN is never a library call.
- case LibFunc::copysignl:
+ case LibFunc_copysignl:
return true;
- case LibFunc::fabs:
- case LibFunc::fabsf:
- case LibFunc::fabsl:
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
continue; // ISD::FABS is never a library call.
- case LibFunc::sqrt:
- case LibFunc::sqrtf:
- case LibFunc::sqrtl:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
Opcode = ISD::FSQRT; break;
- case LibFunc::floor:
- case LibFunc::floorf:
- case LibFunc::floorl:
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
Opcode = ISD::FFLOOR; break;
- case LibFunc::nearbyint:
- case LibFunc::nearbyintf:
- case LibFunc::nearbyintl:
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
Opcode = ISD::FNEARBYINT; break;
- case LibFunc::ceil:
- case LibFunc::ceilf:
- case LibFunc::ceill:
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
Opcode = ISD::FCEIL; break;
- case LibFunc::rint:
- case LibFunc::rintf:
- case LibFunc::rintl:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
Opcode = ISD::FRINT; break;
- case LibFunc::round:
- case LibFunc::roundf:
- case LibFunc::roundl:
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
Opcode = ISD::FROUND; break;
- case LibFunc::trunc:
- case LibFunc::truncf:
- case LibFunc::truncl:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
Opcode = ISD::FTRUNC; break;
- case LibFunc::fmin:
- case LibFunc::fminf:
- case LibFunc::fminl:
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
Opcode = ISD::FMINNUM; break;
- case LibFunc::fmax:
- case LibFunc::fmaxf:
- case LibFunc::fmaxl:
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
Opcode = ISD::FMAXNUM; break;
}
}
diff --git a/lib/Target/PowerPC/PPCExpandISEL.cpp b/lib/Target/PowerPC/PPCExpandISEL.cpp
new file mode 100644
index 000000000000..ebd414baf1d2
--- /dev/null
+++ b/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -0,0 +1,458 @@
+//===------------- PPCExpandISEL.cpp - Expand ISEL instruction ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that expands the ISEL instruction into an if-then-else sequence.
+// This pass must be run post-RA since all operands must be physical registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-expand-isel"
+
+STATISTIC(NumExpanded, "Number of ISEL instructions expanded");
+STATISTIC(NumRemoved, "Number of ISEL instructions removed");
+STATISTIC(NumFolded, "Number of ISEL instructions folded");
+
+// If -ppc-gen-isel=false is set, we will disable generating the ISEL
+// instruction on all PPC targets. Otherwise, if the user set option
+// -misel or the platform supports ISEL by default, still generate the
+// ISEL instruction, else expand it.
+static cl::opt<bool>
+ GenerateISEL("ppc-gen-isel",
+ cl::desc("Enable generating the ISEL instruction."),
+ cl::init(true), cl::Hidden);
+
+namespace {
+class PPCExpandISEL : public MachineFunctionPass {
+ DebugLoc dl;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ bool IsTrueBlockRequired;
+ bool IsFalseBlockRequired;
+ MachineBasicBlock *TrueBlock;
+ MachineBasicBlock *FalseBlock;
+ MachineBasicBlock *NewSuccessor;
+ MachineBasicBlock::iterator TrueBlockI;
+ MachineBasicBlock::iterator FalseBlockI;
+
+ typedef SmallVector<MachineInstr *, 4> BlockISELList;
+ typedef SmallDenseMap<int, BlockISELList> ISELInstructionList;
+
+ // A map of MBB numbers to their lists of contained ISEL instructions.
+ ISELInstructionList ISELInstructions;
+
+ /// Initialize the object.
+ void initialize(MachineFunction &MFParam);
+
+ void handleSpecialCases(BlockISELList &BIL, MachineBasicBlock *MBB);
+ void reorganizeBlockLayout(BlockISELList &BIL, MachineBasicBlock *MBB);
+ void populateBlocks(BlockISELList &BIL);
+ void expandMergeableISELs(BlockISELList &BIL);
+ void expandAndMergeISELs();
+
+ bool canMerge(MachineInstr *PrevPushedMI, MachineInstr *MI);
+
+ /// Is this instruction an ISEL or ISEL8?
+ static bool isISEL(const MachineInstr &MI) {
+ return (MI.getOpcode() == PPC::ISEL || MI.getOpcode() == PPC::ISEL8);
+ }
+
+ /// Is this instruction an ISEL8?
+ static bool isISEL8(const MachineInstr &MI) {
+ return (MI.getOpcode() == PPC::ISEL8);
+ }
+
+ /// Are the two operands using the same register?
+ bool useSameRegister(const MachineOperand &Op1, const MachineOperand &Op2) {
+ return (Op1.getReg() == Op2.getReg());
+ }
+
+ ///
+ /// Collect all ISEL instructions from the current function.
+ ///
+ /// Walk the current function and collect all the ISEL instructions that are
+ /// found. The instructions are placed in the ISELInstructions vector.
+ ///
+ /// \return true if any ISEL instructions were found, false otherwise
+ ///
+ bool collectISELInstructions();
+
+public:
+ static char ID;
+ PPCExpandISEL() : MachineFunctionPass(ID) {
+ initializePPCExpandISELPass(*PassRegistry::getPassRegistry());
+ }
+
+ ///
+ /// Determine whether to generate the ISEL instruction or expand it.
+ ///
+ /// Expand ISEL instruction into if-then-else sequence when one of
+ /// the following two conditions hold:
+ /// (1) -ppc-gen-isel=false
+ /// (2) hasISEL() return false
+ /// Otherwise, still generate ISEL instruction.
+ /// The -ppc-gen-isel option is set to true by default. Which means the ISEL
+ /// instruction is still generated by default on targets that support them.
+ ///
+ /// \return true if ISEL should be expanded into if-then-else code sequence;
+ /// false if ISEL instruction should be generated, i.e. not expaned.
+ ///
+ static bool isExpandISELEnabled(const MachineFunction &MF);
+
+#ifndef NDEBUG
+ void DumpISELInstructions() const;
+#endif
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!isExpandISELEnabled(MF))
+ return false;
+
+ DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n");
+ initialize(MF);
+
+ if (!collectISELInstructions()) {
+ DEBUG(dbgs() << "No ISEL instructions in this function\n");
+ return false;
+ }
+
+#ifndef NDEBUG
+ DumpISELInstructions();
+#endif
+
+ expandAndMergeISELs();
+
+ return true;
+ }
+};
+} // end anonymous namespace
+
+void PPCExpandISEL::initialize(MachineFunction &MFParam) {
+ MF = &MFParam;
+ TII = MF->getSubtarget().getInstrInfo();
+ ISELInstructions.clear();
+}
+
+bool PPCExpandISEL::isExpandISELEnabled(const MachineFunction &MF) {
+ return !GenerateISEL || !MF.getSubtarget<PPCSubtarget>().hasISEL();
+}
+
+bool PPCExpandISEL::collectISELInstructions() {
+ for (MachineBasicBlock &MBB : *MF) {
+ BlockISELList thisBlockISELs;
+ for (MachineInstr &MI : MBB)
+ if (isISEL(MI))
+ thisBlockISELs.push_back(&MI);
+ if (!thisBlockISELs.empty())
+ ISELInstructions.insert(std::make_pair(MBB.getNumber(), thisBlockISELs));
+ }
+ return !ISELInstructions.empty();
+}
+
+#ifndef NDEBUG
+void PPCExpandISEL::DumpISELInstructions() const {
+ for (const auto &I : ISELInstructions) {
+ DEBUG(dbgs() << "BB#" << I.first << ":\n");
+ for (const auto &VI : I.second)
+ DEBUG(dbgs() << " "; VI->print(dbgs()));
+ }
+}
+#endif
+
+/// Contiguous ISELs that have the same condition can be merged.
+bool PPCExpandISEL::canMerge(MachineInstr *PrevPushedMI, MachineInstr *MI) {
+ // Same Condition Register?
+ if (!useSameRegister(PrevPushedMI->getOperand(3), MI->getOperand(3)))
+ return false;
+
+ MachineBasicBlock::iterator PrevPushedMBBI = *PrevPushedMI;
+ MachineBasicBlock::iterator MBBI = *MI;
+ return (std::prev(MBBI) == PrevPushedMBBI); // Contiguous ISELs?
+}
+
+void PPCExpandISEL::expandAndMergeISELs() {
+ for (auto &BlockList : ISELInstructions) {
+ DEBUG(dbgs() << "Expanding ISEL instructions in BB#" << BlockList.first
+ << "\n");
+
+ BlockISELList &CurrentISELList = BlockList.second;
+ auto I = CurrentISELList.begin();
+ auto E = CurrentISELList.end();
+
+ while (I != E) {
+ BlockISELList SubISELList;
+
+ SubISELList.push_back(*I++);
+
+ // Collect the ISELs that can be merged together.
+ while (I != E && canMerge(SubISELList.back(), *I))
+ SubISELList.push_back(*I++);
+
+ expandMergeableISELs(SubISELList);
+ }
+ }
+}
+
+void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL,
+ MachineBasicBlock *MBB) {
+ IsTrueBlockRequired = false;
+ IsFalseBlockRequired = false;
+
+ auto MI = BIL.begin();
+ while (MI != BIL.end()) {
+ assert(isISEL(**MI) && "Expecting an ISEL instruction");
+ DEBUG(dbgs() << "ISEL: " << **MI << "\n");
+
+ MachineOperand &Dest = (*MI)->getOperand(0);
+ MachineOperand &TrueValue = (*MI)->getOperand(1);
+ MachineOperand &FalseValue = (*MI)->getOperand(2);
+
+ // If at least one of the ISEL instructions satisfy the following
+ // condition, we need the True Block:
+ // The Dest Register and True Value Register are not the same
+ // Similarly, if at least one of the ISEL instructions satisfy the
+ // following condition, we need the False Block:
+ // The Dest Register and False Value Register are not the same.
+
+ bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue);
+ bool IsORIInstRequired = !useSameRegister(Dest, FalseValue);
+
+ // Special case 1, all registers used by ISEL are the same one.
+ if (!IsADDIInstRequired && !IsORIInstRequired) {
+ DEBUG(dbgs() << "Remove redudant ISEL instruction.");
+ NumRemoved++;
+ (*MI)->eraseFromParent();
+ // Setting MI to the erase result keeps the iterator valid and increased.
+ MI = BIL.erase(MI);
+ continue;
+ }
+
+ // Special case 2, the two input registers used by ISEL are the same.
+ // Note 1: We favor merging ISEL expansions over folding a single one. If
+ // the passed list has multiple merge-able ISEL's, we won't fold any.
+ // Note 2: There is no need to test for PPC::R0/PPC::X0 because PPC::ZERO/
+ // PPC::ZERO8 will be used for the first operand if the value is meant to
+ // be zero. In this case, the useSameRegister method will return false,
+ // thereby preventing this ISEL from being folded.
+
+ if (useSameRegister(TrueValue, FalseValue) && (BIL.size() == 1)) {
+ DEBUG(dbgs() << "Fold the ISEL instruction to an unconditonal copy.");
+ NumFolded++;
+ BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::ADDI8 : PPC::ADDI))
+ .add(Dest)
+ .add(TrueValue)
+ .add(MachineOperand::CreateImm(0));
+ (*MI)->eraseFromParent();
+ // Setting MI to the erase result keeps the iterator valid and increased.
+ MI = BIL.erase(MI);
+ continue;
+ }
+
+ IsTrueBlockRequired |= IsADDIInstRequired;
+ IsFalseBlockRequired |= IsORIInstRequired;
+ MI++;
+ }
+}
+
+void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL,
+ MachineBasicBlock *MBB) {
+ if (BIL.empty())
+ return;
+
+ assert((IsTrueBlockRequired || IsFalseBlockRequired) &&
+ "Should have been handled by special cases earlier!");
+
+ MachineBasicBlock *Successor = nullptr;
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineBasicBlock::iterator MBBI = (*BIL.back());
+ NewSuccessor = (MBBI != MBB->getLastNonDebugInstr() || !MBB->canFallThrough())
+ // Another BB is needed to move the instructions that
+ // follow this ISEL. If the ISEL is the last instruction
+ // in a block that can't fall through, we also need a block
+ // to branch to.
+ ? MF->CreateMachineBasicBlock(LLVM_BB)
+ : nullptr;
+
+ MachineFunction::iterator It = MBB->getIterator();
+ ++It; // Point to the successor block of MBB.
+
+ // If NewSuccessor is NULL then the last ISEL in this group is the last
+ // non-debug instruction in this block. Find the fall-through successor
+ // of this block to use when updating the CFG below.
+ if (!NewSuccessor) {
+ for (auto &Succ : MBB->successors()) {
+ if (MBB->isLayoutSuccessor(Succ)) {
+ Successor = Succ;
+ break;
+ }
+ }
+ } else
+ Successor = NewSuccessor;
+
+ // The FalseBlock and TrueBlock are inserted after the MBB block but before
+ // its successor.
+ // Note this need to be done *after* the above setting the Successor code.
+ if (IsFalseBlockRequired) {
+ FalseBlock = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, FalseBlock);
+ }
+
+ if (IsTrueBlockRequired) {
+ TrueBlock = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, TrueBlock);
+ }
+
+ if (NewSuccessor) {
+ MF->insert(It, NewSuccessor);
+
+ // Transfer the rest of this block into the new successor block.
+ NewSuccessor->splice(NewSuccessor->end(), MBB,
+ std::next(MachineBasicBlock::iterator(BIL.back())),
+ MBB->end());
+ NewSuccessor->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Copy the original liveIns of MBB to NewSuccessor.
+ for (auto &LI : MBB->liveins())
+ NewSuccessor->addLiveIn(LI);
+
+ // After splitting the NewSuccessor block, Regs defined but not killed
+ // in MBB should be treated as liveins of NewSuccessor.
+ // Note: Cannot use stepBackward instead since we are using the Reg
+ // liveness state at the end of MBB (liveOut of MBB) as the liveIn for
+ // NewSuccessor. Otherwise, will cause cyclic dependence.
+ LivePhysRegs LPR(MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
+ SmallVector<std::pair<unsigned, const MachineOperand *>, 2> Clobbers;
+ for (MachineInstr &MI : *MBB)
+ LPR.stepForward(MI, Clobbers);
+ for (auto &LI : LPR)
+ NewSuccessor->addLiveIn(LI);
+ } else {
+ // Remove successor from MBB.
+ MBB->removeSuccessor(Successor);
+ }
+
+ // Note that this needs to be done *after* transfering the successors from MBB
+ // to the NewSuccessor block, otherwise these blocks will also be transferred
+ // as successors!
+ MBB->addSuccessor(IsTrueBlockRequired ? TrueBlock : Successor);
+ MBB->addSuccessor(IsFalseBlockRequired ? FalseBlock : Successor);
+
+ if (IsTrueBlockRequired) {
+ TrueBlockI = TrueBlock->begin();
+ TrueBlock->addSuccessor(Successor);
+ }
+
+ if (IsFalseBlockRequired) {
+ FalseBlockI = FalseBlock->begin();
+ FalseBlock->addSuccessor(Successor);
+ }
+
+ // Conditional branch to the TrueBlock or Successor
+ BuildMI(*MBB, BIL.back(), dl, TII->get(PPC::BC))
+ .add(BIL.back()->getOperand(3))
+ .addMBB(IsTrueBlockRequired ? TrueBlock : Successor);
+
+ // Jump over the true block to the new successor if the condition is false.
+ BuildMI(*(IsFalseBlockRequired ? FalseBlock : MBB),
+ (IsFalseBlockRequired ? FalseBlockI : BIL.back()), dl,
+ TII->get(PPC::B))
+ .addMBB(Successor);
+
+ if (IsFalseBlockRequired)
+ FalseBlockI = FalseBlock->begin(); // get the position of PPC::B
+}
+
+void PPCExpandISEL::populateBlocks(BlockISELList &BIL) {
+ for (auto &MI : BIL) {
+ assert(isISEL(*MI) && "Expecting an ISEL instruction");
+
+ MachineOperand &Dest = MI->getOperand(0); // location to store to
+ MachineOperand &TrueValue = MI->getOperand(1); // Value to store if
+ // condition is true
+ MachineOperand &FalseValue = MI->getOperand(2); // Value to store if
+ // condition is false
+ MachineOperand &ConditionRegister = MI->getOperand(3); // Condition
+
+ DEBUG(dbgs() << "Dest: " << Dest << "\n");
+ DEBUG(dbgs() << "TrueValue: " << TrueValue << "\n");
+ DEBUG(dbgs() << "FalseValue: " << FalseValue << "\n");
+ DEBUG(dbgs() << "ConditionRegister: " << ConditionRegister << "\n");
+
+
+ // If the Dest Register and True Value Register are not the same one, we
+ // need the True Block.
+ bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue);
+ bool IsORIInstRequired = !useSameRegister(Dest, FalseValue);
+
+ if (IsADDIInstRequired) {
+ // Copy the result into the destination if the condition is true.
+ BuildMI(*TrueBlock, TrueBlockI, dl,
+ TII->get(isISEL8(*MI) ? PPC::ADDI8 : PPC::ADDI))
+ .add(Dest)
+ .add(TrueValue)
+ .add(MachineOperand::CreateImm(0));
+
+ // Add the LiveIn registers required by true block.
+ TrueBlock->addLiveIn(TrueValue.getReg());
+ }
+
+ if (IsORIInstRequired) {
+ // Add the LiveIn registers required by false block.
+ FalseBlock->addLiveIn(FalseValue.getReg());
+ }
+
+ if (NewSuccessor) {
+ // Add the LiveIn registers required by NewSuccessor block.
+ NewSuccessor->addLiveIn(Dest.getReg());
+ NewSuccessor->addLiveIn(TrueValue.getReg());
+ NewSuccessor->addLiveIn(FalseValue.getReg());
+ NewSuccessor->addLiveIn(ConditionRegister.getReg());
+ }
+
+ // Copy the value into the destination if the condition is false.
+ if (IsORIInstRequired)
+ BuildMI(*FalseBlock, FalseBlockI, dl,
+ TII->get(isISEL8(*MI) ? PPC::ORI8 : PPC::ORI))
+ .add(Dest)
+ .add(FalseValue)
+ .add(MachineOperand::CreateImm(0));
+
+ MI->eraseFromParent(); // Remove the ISEL instruction.
+
+ NumExpanded++;
+ }
+}
+
+void PPCExpandISEL::expandMergeableISELs(BlockISELList &BIL) {
+ // At this stage all the ISELs of BIL are in the same MBB.
+ MachineBasicBlock *MBB = BIL.back()->getParent();
+
+ handleSpecialCases(BIL, MBB);
+ reorganizeBlockLayout(BIL, MBB);
+ populateBlocks(BIL);
+}
+
+INITIALIZE_PASS(PPCExpandISEL, DEBUG_TYPE, "PowerPC Expand ISEL Generation",
+ false, false)
+char PPCExpandISEL::ID = 0;
+
+FunctionPass *llvm::createPPCExpandISELPass() { return new PPCExpandISEL(); }
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index e786ef9aee0e..4c9430a2eca0 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -433,8 +433,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// If we are a leaf function, and use up to 224 bytes of stack space,
// don't have a frame pointer, calls, or dynamic alloca then we do not need
@@ -519,8 +518,7 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
@@ -616,8 +614,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
return true;
// Get the list of callee-saved registers for the target.
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
// Get all the available registers in the block.
@@ -663,8 +660,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
// and the stack frame is large, we need two scratch registers.
bool
PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MachineFunction &MF = *(MBB->getParent());
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned FrameSize = determineFrameLayout(MF, false);
@@ -694,10 +690,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo &MFI = MF.getFrameInfo();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
@@ -1221,10 +1215,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
if (MBBI != MBB.end())
dl = MBBI->getDebugLoc();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// Get alignment info so we know how to restore the SP.
const MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -1550,8 +1542,7 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
if (MBBI != MBB.end())
dl = MBBI->getDebugLoc();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
// Create branch instruction for pseudo tail call return instruction
unsigned RetOpcode = MBBI->getOpcode();
@@ -1589,8 +1580,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -1793,8 +1783,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
}
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
if (RegInfo->hasBasePointer(MF)) {
HasGPSaveArea = true;
@@ -1941,8 +1930,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return false;
MachineFunction *MF = MBB.getParent();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
@@ -2083,8 +2071,7 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return false;
MachineFunction *MF = MBB.getParent();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
bool CR2Spilled = false;
bool CR3Spilled = false;
bool CR4Spilled = false;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 1e51c1f651c9..9c72638023bb 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -12,30 +12,57 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <new>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "ppc-codegen"
@@ -60,6 +87,7 @@ static cl::opt<bool> EnableBranchHint(
cl::Hidden);
namespace {
+
//===--------------------------------------------------------------------===//
/// PPCDAGToDAGISel - PPC specific code to select PPC machine
/// instructions for SelectionDAG operations.
@@ -69,6 +97,7 @@ namespace {
const PPCSubtarget *PPCSubTarget;
const PPCTargetLowering *PPCLowering;
unsigned GlobalBaseReg;
+
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
: SelectionDAGISel(tm), TM(tm) {}
@@ -184,7 +213,6 @@ namespace {
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
std::vector<SDValue> &OutOps) override {
-
switch(ConstraintID) {
default:
errs() << "ConstraintID: " << ConstraintID << "\n";
@@ -237,7 +265,8 @@ private:
void transferMemOperands(SDNode *N, SDNode *Result);
};
-}
+
+} // end anonymous namespace
/// InsertVRSaveCode - Once the entire function has been instruction selected,
/// all virtual registers are created and all machine instructions are built,
@@ -303,7 +332,6 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
}
}
-
/// getGlobalBaseReg - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
@@ -368,7 +396,6 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) {
return isIntS16Immediate(Op.getNode(), Imm);
}
-
/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
/// operand. If so Imm will receive the 32-bit value.
static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
@@ -833,6 +860,7 @@ static SDNode *getInt64(SelectionDAG *CurDAG, SDNode *N) {
}
namespace {
+
class BitPermutationSelector {
struct ValueBit {
SDValue V;
@@ -898,14 +926,12 @@ class BitPermutationSelector {
// associated with each) used to choose the lowering method.
struct ValueRotInfo {
SDValue V;
- unsigned RLAmt;
- unsigned NumGroups;
- unsigned FirstGroupStartIdx;
- bool Repl32;
+ unsigned RLAmt = std::numeric_limits<unsigned>::max();
+ unsigned NumGroups = 0;
+ unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
+ bool Repl32 = false;
- ValueRotInfo()
- : RLAmt(UINT32_MAX), NumGroups(0), FirstGroupStartIdx(UINT32_MAX),
- Repl32(false) {}
+ ValueRotInfo() = default;
// For sorting (in reverse order) by NumGroups, and then by
// FirstGroupStartIdx.
@@ -1985,7 +2011,8 @@ public:
return RNLM;
}
};
-} // anonymous namespace
+
+} // end anonymous namespace
bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
if (N->getValueType(0) != MVT::i32 &&
@@ -2450,7 +2477,6 @@ void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
}
-
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
void PPCDAGToDAGISel::Select(SDNode *N) {
@@ -2474,19 +2500,18 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
- case ISD::Constant: {
+ case ISD::Constant:
if (N->getValueType(0) == MVT::i64) {
ReplaceNode(N, getInt64(CurDAG, N));
return;
}
break;
- }
- case ISD::SETCC: {
+ case ISD::SETCC:
if (trySETCC(N))
return;
break;
- }
+
case PPCISD::GlobalBaseReg:
ReplaceNode(N, getGlobalBaseReg());
return;
@@ -2502,11 +2527,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
- case PPCISD::READ_TIME_BASE: {
+ case PPCISD::READ_TIME_BASE:
ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
MVT::Other, N->getOperand(0)));
return;
- }
case PPCISD::SRA_ADDZE: {
SDValue N0 = N->getOperand(0);
@@ -2690,6 +2714,19 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
return;
}
+ // If this is a negated 64-bit zero-extension mask,
+ // i.e. the immediate is a sequence of ones from most significant side
+ // and all zero for reminder, we should use rldicr.
+ if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
+ isMask_64(~Imm64)) {
+ SDValue Val = N->getOperand(0);
+ MB = 63 - countTrailingOnes(~Imm64);
+ SH = 0;
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
+ return;
+ }
+
// AND X, 0 -> 0, not "rlwinm 32".
if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
ReplaceUses(SDValue(N, 0), N->getOperand(1));
@@ -2911,8 +2948,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
return;
}
-
break;
+
case ISD::VECTOR_SHUFFLE:
if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)) {
@@ -2940,7 +2977,11 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
SDValue Chain = LD->getChain();
SDValue Ops[] = { Base, Offset, Chain };
- CurDAG->SelectNodeTo(N, PPC::LXVDSX, N->getValueType(0), Ops);
+ SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
+ N->getValueType(0), Ops);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
return;
}
}
@@ -3088,7 +3129,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SDValue(Tmp, 0), GA));
return;
}
- case PPCISD::PPC32_PICGOT: {
+ case PPCISD::PPC32_PICGOT:
// Generate a PIC-safe GOT reference.
assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
@@ -3096,7 +3137,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
PPCLowering->getPointerTy(CurDAG->getDataLayout()),
MVT::i32);
return;
- }
+
case PPCISD::VADD_SPLAT: {
// This expands into one of three sequences, depending on whether
// the first operand is odd or even, positive or negative.
@@ -3139,7 +3180,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SDValue TmpVal = SDValue(Tmp, 0);
ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
return;
-
} else if (Elt > 0) {
// Elt is odd and positive, in the range [17,31].
//
@@ -3154,7 +3194,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
SDValue(Tmp2, 0)));
return;
-
} else {
// Elt is odd and negative, in the range [-31,-17].
//
@@ -3199,7 +3238,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue RHS, LHS;
- bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+ bool BytesFound[8] = {false, false, false, false, false, false, false, false};
uint64_t Mask = 0, Alt = 0;
auto IsByteSelectCC = [this](SDValue O, unsigned &b,
@@ -3499,7 +3538,6 @@ void PPCDAGToDAGISel::PreprocessISelDAG() {
/// PostprocessISelDAG - Perform some late peephole optimizations
/// on the DAG representation.
void PPCDAGToDAGISel::PostprocessISelDAG() {
-
// Skip peepholes at -O0.
if (TM.getOptLevel() == CodeGenOpt::None)
return;
@@ -3515,10 +3553,6 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
// be folded with the isel so that we don't need to materialize a register
// containing zero.
bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
- // If we're not using isel, then this does not matter.
- if (!PPCSubTarget->hasISEL())
- return false;
-
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
UI != UE; ++UI) {
SDNode *User = *UI;
@@ -4520,7 +4554,6 @@ void PPCDAGToDAGISel::PeepholePPC64() {
}
}
-
/// createPPCISelDag - This pass converts a legalized DAG into a
/// PowerPC-specific DAG, ready for instruction scheduling.
///
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 2b9195b095e1..f7663d8e5185 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11,39 +11,88 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCISelLowering.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
#include "PPCCallingConv.h"
#include "PPCCCState.h"
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
+#include "PPCRegisterInfo.h"
+#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
-#include "PPCTargetObjectFile.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
#include <list>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -1525,7 +1574,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE) {
-
// Check that the mask is shuffling words
for (unsigned i = 0; i < 4; ++i) {
unsigned B0 = N->getMaskElt(i*4);
@@ -1643,7 +1691,6 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// If the element isn't a constant, bail fully out.
if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
-
if (!UniquedVals[i&(Multiple-1)].getNode())
UniquedVals[i&(Multiple-1)] = N->getOperand(i);
else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
@@ -2026,7 +2073,6 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
}
if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
-
// Common code will reject creating a pre-inc form if the base pointer
// is a frame index, or if N is a store and the base pointer is either
// the same as or a predecessor of the value being stored. Check for
@@ -2277,7 +2323,6 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
-
// FIXME: TLS addresses currently use medium model code sequences,
// which is the most useful form. Eventually support for small and
// large models could be added if users need it, at the cost of
@@ -2602,10 +2647,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__trampoline_setup", PtrVT),
- std::move(Args));
+ CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
@@ -2737,7 +2781,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-bool
+bool
llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
@@ -2752,7 +2796,7 @@ llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
unsigned RegNum = State.getFirstUnallocated(ArgRegs);
int RegsLeft = NumArgRegs - RegNum;
- // Skip if there is not enough registers left for long double type (4 gpr regs
+ // Skip if there is not enough registers left for long double type (4 gpr regs
// in soft float mode) and put long double argument on the stack.
if (RegNum != NumArgRegs && RegsLeft < 4) {
for (int i = 0; i < RegsLeft; i++) {
@@ -4066,7 +4110,7 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget,
static bool
hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
- if (CS->arg_size() != CallerFn->getArgumentList().size())
+ if (CS->arg_size() != CallerFn->arg_size())
return false;
ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
@@ -4222,11 +4266,12 @@ namespace {
struct TailCallArgumentInfo {
SDValue Arg;
SDValue FrameIdxOp;
- int FrameIdx;
+ int FrameIdx = 0;
- TailCallArgumentInfo() : FrameIdx(0) {}
+ TailCallArgumentInfo() = default;
};
-}
+
+} // end anonymous namespace
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static void StoreTailCallArgumentsToStackSlot(
@@ -4406,7 +4451,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
-
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
@@ -4602,7 +4646,6 @@ SDValue PPCTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
-
SmallVector<CCValAssign, 16> RVLocs;
CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
@@ -4649,7 +4692,6 @@ SDValue PPCTargetLowering::FinishCall(
SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
-
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
@@ -5059,7 +5101,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite *CS) const {
-
bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
@@ -5105,10 +5146,30 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
};
const unsigned NumGPRs = array_lengthof(GPR);
- const unsigned NumFPRs = 13;
+ const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
const unsigned NumVRs = array_lengthof(VR);
const unsigned NumQFPRs = NumFPRs;
+ // On ELFv2, we can avoid allocating the parameter area if all the arguments
+ // can be passed to the callee in registers.
+ // For the fast calling convention, there is another check below.
+ // Note: We should keep consistent with LowerFormalArguments_64SVR4()
+ bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
+ if (!HasParameterArea) {
+ unsigned ParamAreaSize = NumGPRs * PtrByteSize;
+ unsigned AvailableFPRs = NumFPRs;
+ unsigned AvailableVRs = NumVRs;
+ unsigned NumBytesTmp = NumBytes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (Outs[i].Flags.isNest()) continue;
+ if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytesTmp, AvailableFPRs, AvailableVRs,
+ Subtarget.hasQPX()))
+ HasParameterArea = true;
+ }
+ }
+
// When using the fast calling convention, we don't provide backing for
// arguments that will be in registers.
unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
@@ -5176,13 +5237,18 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
unsigned NumBytesActuallyUsed = NumBytes;
- // The prolog code of the callee may store up to 8 GPR argument registers to
+ // In the old ELFv1 ABI,
+ // the prolog code of the callee may store up to 8 GPR argument registers to
// the stack, allowing va_start to index over them in memory if its varargs.
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
- // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
- NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+ // In the ELFv2 ABI, we allocate the parameter area iff a callee
+ // really requires memory operands, e.g. a vararg function.
+ if (HasParameterArea)
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+ else
+ NumBytes = LinkageSize;
// Tail call needs the stack to be aligned.
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
@@ -5401,6 +5467,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (CallConv == CallingConv::Fast)
ComputePtrOff();
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
@@ -5486,6 +5554,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
}
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
@@ -5520,6 +5590,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// GPRs when within range. For now, we always put the value in both
// locations (or even all three).
if (isVarArg) {
+ assert(HasParameterArea &&
+ "Parameter area must exist if we have a varargs call.");
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
SDValue Store =
@@ -5552,6 +5624,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (CallConv == CallingConv::Fast)
ComputePtrOff();
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
@@ -5572,6 +5646,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
case MVT::v4i1: {
bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
if (isVarArg) {
+ assert(HasParameterArea &&
+ "Parameter area must exist if we have a varargs call.");
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
SDValue Store =
@@ -5604,6 +5680,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (CallConv == CallingConv::Fast)
ComputePtrOff();
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
@@ -5618,7 +5696,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
}
}
- assert(NumBytesActuallyUsed == ArgOffset);
+ assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
+ "mismatch in size of parameter area");
(void)NumBytesActuallyUsed;
if (!MemOpChains.empty())
@@ -5673,7 +5752,6 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite *CS) const {
-
unsigned NumOps = Outs.size();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -6065,7 +6143,6 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
-
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
@@ -7612,7 +7689,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
}
-
}
if (Subtarget.hasQPX()) {
@@ -7792,24 +7868,39 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
bool &isDot, const PPCSubtarget &Subtarget) {
unsigned IntrinsicID =
- cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+ cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
isDot = false;
switch (IntrinsicID) {
- default: return false;
- // Comparison predicates.
- case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ default:
+ return false;
+ // Comparison predicates.
+ case Intrinsic::ppc_altivec_vcmpbfp_p:
+ CompareOpc = 966;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpeqfp_p:
+ CompareOpc = 198;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequb_p:
+ CompareOpc = 6;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequh_p:
+ CompareOpc = 70;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequw_p:
+ CompareOpc = 134;
+ isDot = true;
+ break;
case Intrinsic::ppc_altivec_vcmpequd_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 199;
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
case Intrinsic::ppc_altivec_vcmpneb_p:
case Intrinsic::ppc_altivec_vcmpneh_p:
@@ -7818,45 +7909,80 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpnezh_p:
case Intrinsic::ppc_altivec_vcmpnezw_p:
if (Subtarget.hasP9Altivec()) {
- switch(IntrinsicID) {
- default: llvm_unreachable("Unknown comparison intrinsic.");
- case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break;
- case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break;
- case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break;
- case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break;
- case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break;
- case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break;
+ switch (IntrinsicID) {
+ default:
+ llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpneb_p:
+ CompareOpc = 7;
+ break;
+ case Intrinsic::ppc_altivec_vcmpneh_p:
+ CompareOpc = 71;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnew_p:
+ CompareOpc = 135;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezb_p:
+ CompareOpc = 263;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezh_p:
+ CompareOpc = 327;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezw_p:
+ CompareOpc = 391;
+ break;
}
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
- case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgefp_p:
+ CompareOpc = 454;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtfp_p:
+ CompareOpc = 710;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsb_p:
+ CompareOpc = 774;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsh_p:
+ CompareOpc = 838;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ CompareOpc = 902;
+ isDot = true;
+ break;
case Intrinsic::ppc_altivec_vcmpgtsd_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 967;
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
- case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtub_p:
+ CompareOpc = 518;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuh_p:
+ CompareOpc = 582;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ CompareOpc = 646;
+ isDot = true;
+ break;
case Intrinsic::ppc_altivec_vcmpgtud_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 711;
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
- // VSX predicate comparisons use the same infrastructure
+
+ // VSX predicate comparisons use the same infrastructure
case Intrinsic::ppc_vsx_xvcmpeqdp_p:
case Intrinsic::ppc_vsx_xvcmpgedp_p:
case Intrinsic::ppc_vsx_xvcmpgtdp_p:
@@ -7865,33 +7991,51 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_vsx_xvcmpgtsp_p:
if (Subtarget.hasVSX()) {
switch (IntrinsicID) {
- case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
- case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
- case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
- case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
- case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
- case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
+ case Intrinsic::ppc_vsx_xvcmpeqdp_p:
+ CompareOpc = 99;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgedp_p:
+ CompareOpc = 115;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgtdp_p:
+ CompareOpc = 107;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpeqsp_p:
+ CompareOpc = 67;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgesp_p:
+ CompareOpc = 83;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgtsp_p:
+ CompareOpc = 75;
+ break;
}
- isDot = 1;
- }
- else
+ isDot = true;
+ } else
return false;
-
break;
- // Normal Comparisons.
- case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ // Normal Comparisons.
+ case Intrinsic::ppc_altivec_vcmpbfp:
+ CompareOpc = 966;
+ break;
+ case Intrinsic::ppc_altivec_vcmpeqfp:
+ CompareOpc = 198;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequb:
+ CompareOpc = 6;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequh:
+ CompareOpc = 70;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequw:
+ CompareOpc = 134;
+ break;
case Intrinsic::ppc_altivec_vcmpequd:
- if (Subtarget.hasP8Altivec()) {
+ if (Subtarget.hasP8Altivec())
CompareOpc = 199;
- isDot = 0;
- } else
+ else
return false;
-
break;
case Intrinsic::ppc_altivec_vcmpneb:
case Intrinsic::ppc_altivec_vcmpneh:
@@ -7899,43 +8043,67 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpnezb:
case Intrinsic::ppc_altivec_vcmpnezh:
case Intrinsic::ppc_altivec_vcmpnezw:
- if (Subtarget.hasP9Altivec()) {
+ if (Subtarget.hasP9Altivec())
switch (IntrinsicID) {
- default: llvm_unreachable("Unknown comparison intrinsic.");
- case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break;
- case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break;
- case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break;
- case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break;
- case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break;
- case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break;
+ default:
+ llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpneb:
+ CompareOpc = 7;
+ break;
+ case Intrinsic::ppc_altivec_vcmpneh:
+ CompareOpc = 71;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnew:
+ CompareOpc = 135;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezb:
+ CompareOpc = 263;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezh:
+ CompareOpc = 327;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezw:
+ CompareOpc = 391;
+ break;
}
- isDot = 0;
- } else
+ else
return false;
break;
- case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgefp:
+ CompareOpc = 454;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtfp:
+ CompareOpc = 710;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsb:
+ CompareOpc = 774;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsh:
+ CompareOpc = 838;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsw:
+ CompareOpc = 902;
+ break;
case Intrinsic::ppc_altivec_vcmpgtsd:
- if (Subtarget.hasP8Altivec()) {
+ if (Subtarget.hasP8Altivec())
CompareOpc = 967;
- isDot = 0;
- } else
+ else
return false;
-
break;
- case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtub:
+ CompareOpc = 518;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuh:
+ CompareOpc = 582;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuw:
+ CompareOpc = 646;
+ break;
case Intrinsic::ppc_altivec_vcmpgtud:
- if (Subtarget.hasP8Altivec()) {
+ if (Subtarget.hasP8Altivec())
CompareOpc = 711;
- isDot = 0;
- } else
+ else
return false;
-
break;
}
return true;
@@ -8044,7 +8212,7 @@ SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
}
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDLoc dl(Op);
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
@@ -9174,10 +9342,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction *F = BB->getParent();
- if (Subtarget.hasISEL() &&
- (MI.getOpcode() == PPC::SELECT_CC_I4 ||
+ if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8 ||
- MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
+ MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) {
SmallVector<MachineOperand, 2> Cond;
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8)
@@ -9417,7 +9584,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BB = EmitAtomicBinary(MI, BB, 4, 0);
else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
BB = EmitAtomicBinary(MI, BB, 8, 0);
-
else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
(Subtarget.hasPartwordAtomics() &&
@@ -10028,14 +10194,12 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
return false;
}
-
/// This function is called when we have proved that a SETCC node can be replaced
/// by subtraction (and other supporting instructions) so that the result of
/// comparison is kept in a GPR instead of CR. This function is purely for
/// codegen purposes and has some flags to guide the codegen process.
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
bool Swap, SDLoc &DL, SelectionDAG &DAG) {
-
assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
// Zero extend the operands to the largest legal integer. Originally, they
@@ -10068,7 +10232,6 @@ static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
DAGCombinerInfo &DCI) const {
-
assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
SelectionDAG &DAG = DCI.DAG;
@@ -11227,9 +11390,20 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (BSwapOp.getValueType() == MVT::i16)
BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+ // If the type of BSWAP operand is wider than stored memory width
+ // it need to be shifted to the right side before STBRX.
+ EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
+ if (Op1VT.bitsGT(mVT)) {
+ int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
+ BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
+ DAG.getConstant(Shift, dl, MVT::i32));
+ // Need to truncate if this is a bswap of i64 stored as i32/i16.
+ if (Op1VT == MVT::i64)
+ BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
+ }
+
SDValue Ops[] = {
- N->getOperand(0), BSwapOp, N->getOperand(2),
- DAG.getValueType(N->getOperand(1).getValueType())
+ N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
};
return
DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
@@ -11570,7 +11744,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
- case ISD::INTRINSIC_W_CHAIN: {
+ case ISD::INTRINSIC_W_CHAIN:
// For little endian, VSX loads require generating lxvd2x/xxswapd.
// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
if (Subtarget.needsSwapsForVSXMemOps()) {
@@ -11583,8 +11757,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
break;
- }
- case ISD::INTRINSIC_VOID: {
+ case ISD::INTRINSIC_VOID:
// For little endian, VSX stores require generating xxswapd/stxvd2x.
// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
if (Subtarget.needsSwapsForVSXMemOps()) {
@@ -11597,7 +11770,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
break;
- }
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
@@ -11635,9 +11807,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Return N so it doesn't get rechecked!
return SDValue(N, 0);
}
-
break;
- case PPCISD::VCMP: {
+ case PPCISD::VCMP:
// If a VCMPo node already exists with exactly the same operands as this
// node, use its result instead of this node (VCMPo computes both a CR6 and
// a normal output).
@@ -11687,7 +11858,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue(VCMPoNode, 0);
}
break;
- }
case ISD::BRCOND: {
SDValue Cond = N->getOperand(1);
SDValue Target = N->getOperand(2);
@@ -11847,6 +12017,7 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
@@ -12295,7 +12466,6 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
unsigned Intrinsic) const {
-
switch (Intrinsic) {
case Intrinsic::ppc_qpx_qvlfd:
case Intrinsic::ppc_qpx_qvlfs:
@@ -12753,7 +12923,6 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
}
bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-
if (!VT.isSimple() || !Subtarget.hasVSX())
return false;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 05acd25ae5fc..6113eb58f421 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -17,13 +17,26 @@
#include "PPC.h"
#include "PPCInstrInfo.h"
-#include "PPCRegisterInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
#include "llvm/Target/TargetLowering.h"
+#include <utility>
namespace llvm {
+
namespace PPCISD {
+
enum NodeType : unsigned {
// Start the numbering where the builtin ops and target ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
@@ -398,10 +411,12 @@ namespace llvm {
/// the last operand.
TOC_ENTRY
};
- }
+
+ } // end namespace PPCISD
/// Define some predicates that are used for node matching.
namespace PPC {
+
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
@@ -465,7 +480,8 @@ namespace llvm {
/// If this is a qvaligni shuffle mask, return the shift
/// amount, otherwise return -1.
int isQVALIGNIShuffleMask(SDNode *N);
- }
+
+ } // end namespace PPC
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &Subtarget;
@@ -492,6 +508,7 @@ namespace llvm {
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+
bool useSoftFloat() const override;
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
@@ -514,6 +531,10 @@ namespace llvm {
return true;
}
+ bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+ return VT.isScalarInteger();
+ }
+
bool supportSplitCSR(MachineFunction *MF) const override {
return
MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
@@ -587,6 +608,7 @@ namespace llvm {
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
@@ -694,6 +716,10 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool convertSelectOfConstantsToMath() const override {
+ return true;
+ }
+
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
@@ -785,15 +811,13 @@ namespace llvm {
SDValue Chain;
SDValue ResChain;
MachinePointerInfo MPI;
- bool IsDereferenceable;
- bool IsInvariant;
- unsigned Alignment;
+ bool IsDereferenceable = false;
+ bool IsInvariant = false;
+ unsigned Alignment = 0;
AAMDNodes AAInfo;
- const MDNode *Ranges;
+ const MDNode *Ranges = nullptr;
- ReuseLoadInfo()
- : IsDereferenceable(false), IsInvariant(false), Alignment(0),
- Ranges(nullptr) {}
+ ReuseLoadInfo() = default;
MachineMemOperand::Flags MMOFlags() const {
MachineMemOperand::Flags F = MachineMemOperand::MONone;
@@ -906,15 +930,13 @@ namespace llvm {
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
- SDValue
- LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
- bool
- CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const override;
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -994,14 +1016,16 @@ namespace llvm {
CCAssignFn *useFastISelCCs(unsigned Flag) const;
SDValue
- combineElementTruncationToVectorTruncation(SDNode *N,
- DAGCombinerInfo &DCI) const;
+ combineElementTruncationToVectorTruncation(SDNode *N,
+ DAGCombinerInfo &DCI) const;
};
namespace PPC {
+
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo);
- }
+
+ } // end namespace PPC
bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
@@ -1026,6 +1050,7 @@ namespace llvm {
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
-}
-#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+} // end namespace llvm
+
+#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index fbec8787ef8d..997b96ca6ec8 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -253,11 +253,11 @@ def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC),
Requires<[IsISA3_0]>;
}
-let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in
+let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
"stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
-let mayStore = 1, hasSideEffects = 0 in
+let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC),
"stdat $rS, $rA, $FC", IIC_LdStStore>, isPPC64,
Requires<[IsISA3_0]>;
@@ -1082,7 +1082,7 @@ def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
}
// Stores with Update (pre-inc).
-let PPC970_Unit = 2, mayStore = 1 in {
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
"stbu $rS, $dst", IIC_LdStStoreUpd, []>,
@@ -1232,6 +1232,10 @@ def : Pat<(srl i64:$rS, i32:$rB),
def : Pat<(shl i64:$rS, i32:$rB),
(SLD $rS, $rB)>;
+// SUBFIC
+def : Pat<(sub imm64SExt16:$imm, i64:$in),
+ (SUBFIC8 $in, imm:$imm)>;
+
// SHL/SRL
def : Pat<(shl i64:$in, (i32 imm:$imm)),
(RLDICR $in, imm:$imm, (SHL64 imm:$imm))>;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 5c022749ad64..c380766e9f5c 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -407,7 +407,7 @@ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
"mtvscr $vB", IIC_LdStLoad,
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
-let PPC970_Unit = 2 in { // Loads.
+let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src),
"lvebx $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
@@ -434,7 +434,7 @@ def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src),
[(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
PPC970_Unit_LSU;
-let PPC970_Unit = 2 in { // Stores.
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { // Stores.
def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst),
"stvebx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
@@ -851,6 +851,10 @@ def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
// Additional Altivec Patterns
//
+// Extended mnemonics
+def : InstAlias<"vmr $vD, $vA", (VOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>;
+def : InstAlias<"vnot $vD, $vA", (VNOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>;
+
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 2e0b9355f82b..8e159f47ea2e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -65,7 +65,9 @@ UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
void PPCInstrInfo::anchor() {}
PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
- : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
+ /* CatchRetOpcode */ -1,
+ STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
Subtarget(STI), RI(STI.getTargetMachine()) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
@@ -662,12 +664,14 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
- BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
- BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
else // Conditional branch
BuildMI(&MBB, DL, get(PPC::BCC))
- .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
+ .addImm(Cond[0].getImm())
+ .add(Cond[1])
+ .addMBB(TBB);
return 1;
}
@@ -677,12 +681,14 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
- BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
- BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
else
BuildMI(&MBB, DL, get(PPC::BCC))
- .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
+ .addImm(Cond[0].getImm())
+ .add(Cond[1])
+ .addMBB(TBB);
BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
return 2;
}
@@ -692,9 +698,6 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
- if (!Subtarget.hasISEL())
- return false;
-
if (Cond.size() != 2)
return false;
@@ -736,9 +739,6 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
assert(Cond.size() == 2 &&
"PPC branch conditions have two components!");
- assert(Subtarget.hasISEL() &&
- "Cannot insert select on target without ISEL support");
-
// Get the register classes.
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC =
@@ -1493,7 +1493,7 @@ bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI,
return Found;
}
-bool PPCInstrInfo::isPredicable(MachineInstr &MI) const {
+bool PPCInstrInfo::isPredicable(const MachineInstr &MI) const {
unsigned OpC = MI.getOpcode();
switch (OpC) {
default:
@@ -1836,8 +1836,7 @@ unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
PatchPointOpers Opers(&MI);
return Opers.getNumPatchBytes();
} else {
- const MCInstrDesc &Desc = get(Opcode);
- return Desc.getSize();
+ return get(Opcode).getSize();
}
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 32b2f009a3f5..f11aed8fa268 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -253,7 +253,7 @@ public:
bool DefinesPredicate(MachineInstr &MI,
std::vector<MachineOperand> &Pred) const override;
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
// Comparison optimization.
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index f615cc7cc974..f004ce49cac0 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -114,9 +114,9 @@ def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
[SDNPHasChain, SDNPMayStore]>;
def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx,
[SDNPHasChain, SDNPMayLoad]>;
def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
@@ -243,7 +243,7 @@ def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
[SDNPHasChain, SDNPOptInGlue]>;
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
[SDNPHasChain, SDNPMayStore]>;
@@ -770,9 +770,10 @@ def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
}
// A single-register address. This is used with the SjLj
-// pseudo-instructions.
+// pseudo-instructions which tranlates to LD/LWZ. These instructions requires
+// G8RC_NOX0 registers.
def memr : Operand<iPTR> {
- let MIOperandInfo = (ops ptr_rc:$ptrreg);
+ let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg);
}
def PPCTLSRegOperand : AsmOperandClass {
let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
@@ -1648,7 +1649,7 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
-let mayLoad = 1, hasSideEffects = 0 in {
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
"lbarx $rD, $src", IIC_LdStLWARX, []>,
Requires<[HasPartwordAtomics]>;
@@ -1681,7 +1682,7 @@ def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$rD), (ins gprc:$rA, u5imm:$FC),
Requires<[IsISA3_0]>;
}
-let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in {
+let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
"stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
isDOT, Requires<[HasPartwordAtomics]>;
@@ -1694,7 +1695,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
"stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
}
-let mayStore = 1, hasSideEffects = 0 in
+let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$rS, gprc:$rA, u5imm:$FC),
"stwat $rS, $rA, $FC", IIC_LdStStore>,
Requires<[IsISA3_0]>;
@@ -1740,7 +1741,7 @@ def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
// Unindexed (r+i) Loads with Update (preinc).
-let mayLoad = 1, hasSideEffects = 0 in {
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
@@ -1813,7 +1814,7 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
// Indexed (r+r) Loads.
//
-let PPC970_Unit = 2 in {
+let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in {
def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
"lbzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 xaddr:$src))]>;
@@ -1827,8 +1828,6 @@ def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src),
def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src),
"lwzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (load xaddr:$src))]>;
-
-
def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src),
"lhbrx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
@@ -1860,7 +1859,7 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
//
// Unindexed (r+i) Stores.
-let PPC970_Unit = 2 in {
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
"stb $rS, $src", IIC_LdStStore,
[(truncstorei8 i32:$rS, iaddr:$src)]>;
@@ -1879,7 +1878,7 @@ def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
}
// Unindexed (r+i) Stores with Update (preinc).
-let PPC970_Unit = 2, mayStore = 1 in {
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
"stbu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
@@ -1948,7 +1947,7 @@ def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
}
// Indexed (r+r) Stores with Update (preinc).
-let PPC970_Unit = 2, mayStore = 1 in {
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
"stbux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 0d9e3459f47e..13603732397a 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -62,7 +62,7 @@ def SDTVecConv : SDTypeProfile<1, 2, [
]>;
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
[SDNPHasChain, SDNPMayStore]>;
def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
@@ -117,7 +117,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
let Uses = [RM] in {
// Load indexed instructions
- let mayLoad = 1 in {
+ let mayLoad = 1, mayStore = 0 in {
let CodeSize = 3 in
def LXSDX : XX1Form<31, 588,
(outs vsfrc:$XT), (ins memrr:$src),
@@ -142,7 +142,7 @@ let Uses = [RM] in {
} // mayLoad
// Store indexed instructions
- let mayStore = 1 in {
+ let mayStore = 1, mayLoad = 0 in {
let CodeSize = 3 in
def STXSDX : XX1Form<31, 716,
(outs), (ins vsfrc:$XT, memrr:$dst),
@@ -1197,7 +1197,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
[(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
// VSX scalar loads introduced in ISA 2.07
- let mayLoad = 1 in {
+ let mayLoad = 1, mayStore = 0 in {
let CodeSize = 3 in
def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
"lxsspx $XT, $src", IIC_LdStLFD,
@@ -1211,7 +1211,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
} // mayLoad
// VSX scalar stores introduced in ISA 2.07
- let mayStore = 1 in {
+ let mayStore = 1, mayLoad = 0 in {
let CodeSize = 3 in
def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
"stxsspx $XT, $dst", IIC_LdStSTFD,
@@ -1410,6 +1410,11 @@ let Predicates = [HasDirectMove] in {
"mfvsrd $rA, $XT", IIC_VecGeneral,
[(set i64:$rA, (PPCmfvsr f64:$XT))]>,
Requires<[In64BitMode]>;
+ let isCodeGenOnly = 1 in
+ def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vrrc:$XT),
+ "mfvsrd $rA, $XT", IIC_VecGeneral,
+ []>,
+ Requires<[In64BitMode]>;
def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
"mfvsrwz $rA, $XT", IIC_VecGeneral,
[(set i32:$rA, (PPCmfvsr f64:$XT))]>;
@@ -1440,6 +1445,13 @@ let Predicates = [IsISA3_0, HasDirectMove] in {
} // IsISA3_0, HasDirectMove
} // UseVSXReg = 1
+// We want to parse this from asm, but we don't want to emit this as it would
+// be emitted with a VSX reg. So leave Emit = 0 here.
+def : InstAlias<"mfvrd $rA, $XT",
+ (MFVRD g8rc:$rA, vrrc:$XT), 0>;
+def : InstAlias<"mffprd $rA, $src",
+ (MFVSRD g8rc:$rA, f8rc:$src)>;
+
/* Direct moves of various widths from GPR's into VSR's. Each move lines
the value up into element 0 (both BE and LE). Namely, entities smaller than
a doubleword are shifted left and moved for BE. For LE, they're moved, then
@@ -2186,7 +2198,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
} // UseVSXReg = 1
// Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
- // seperate pattern so that it can convert the input register class from
+ // separate pattern so that it can convert the input register class from
// VRRC(v8i16) to VSRC.
def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)),
(v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>;
@@ -2335,7 +2347,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayLoad = 1 in {
+ let mayLoad = 1, mayStore = 0 in {
// Load Vector
def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
"lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg;
@@ -2383,7 +2395,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayStore = 1 in {
+ let mayStore = 1, mayLoad = 0 in {
// Store Vector
def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
"stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg;
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 2c3e75523e8f..a349fa1b4090 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -72,9 +73,10 @@ namespace {
public:
static char ID; // Pass ID, replacement for typeid
- PPCLoopPreIncPrep() : FunctionPass(ID), TM(nullptr) {
+ PPCLoopPreIncPrep() : FunctionPass(ID) {
initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
}
+
PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
}
@@ -93,7 +95,7 @@ namespace {
bool rotateLoop(Loop *L);
private:
- PPCTargetMachine *TM;
+ PPCTargetMachine *TM = nullptr;
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index e527b018d4fb..541b98e01b99 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -148,7 +148,7 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
MCOperand MCOp;
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
assert(!MO.getSubReg() && "Subregs should be eliminated!");
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index 2413af3f7042..c6d2c3ebcc0f 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -147,9 +147,9 @@ bool PPCMIPeephole::simplifyCode(void) {
<< "Optimizing load-and-splat/splat "
"to load-and-splat/copy: ");
DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(),
- TII->get(PPC::COPY), MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1));
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
ToErase = &MI;
Simplified = true;
}
@@ -169,9 +169,9 @@ bool PPCMIPeephole::simplifyCode(void) {
<< "Optimizing splat/swap or splat/splat "
"to splat/copy: ");
DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(),
- TII->get(PPC::COPY), MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1));
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
ToErase = &MI;
Simplified = true;
}
@@ -194,9 +194,9 @@ bool PPCMIPeephole::simplifyCode(void) {
else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(),
- TII->get(PPC::COPY), MI.getOperand(0).getReg())
- .addOperand(DefMI->getOperand(1));
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(DefMI->getOperand(1));
ToErase = &MI;
Simplified = true;
}
@@ -251,7 +251,7 @@ bool PPCMIPeephole::simplifyCode(void) {
DEBUG(MI.dump());
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(OpNo));
+ .add(MI.getOperand(OpNo));
ToErase = &MI;
Simplified = true;
}
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 9d91e31165de..bc2d9a08b5e8 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -8,14 +8,13 @@
//===----------------------------------------------------------------------===//
#include "PPCMachineFunctionInfo.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
-void PPCFunctionInfo::anchor() { }
+void PPCFunctionInfo::anchor() {}
MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
const DataLayout &DL = MF.getDataLayout();
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 4c29aa06f048..202e10058b73 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -26,17 +27,17 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// FramePointerSaveIndex - Frame index of where the old frame pointer is
/// stored. Also used as an anchor for instructions that need to be altered
/// when using frame pointers (dyna_add, dyna_sub.)
- int FramePointerSaveIndex;
+ int FramePointerSaveIndex = 0;
/// ReturnAddrSaveIndex - Frame index of where the return address is stored.
///
- int ReturnAddrSaveIndex;
+ int ReturnAddrSaveIndex = 0;
/// Frame index where the old base pointer is stored.
- int BasePointerSaveIndex;
+ int BasePointerSaveIndex = 0;
/// Frame index where the old PIC base pointer is stored.
- int PICBasePointerSaveIndex;
+ int PICBasePointerSaveIndex = 0;
/// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
/// function. This is only valid after the initial scan of the function by
@@ -44,54 +45,58 @@ class PPCFunctionInfo : public MachineFunctionInfo {
bool MustSaveLR;
/// Does this function have any stack spills.
- bool HasSpills;
+ bool HasSpills = false;
/// Does this function spill using instructions with only r+r (not r+i)
/// forms.
- bool HasNonRISpills;
+ bool HasNonRISpills = false;
/// SpillsCR - Indicates whether CR is spilled in the current function.
- bool SpillsCR;
+ bool SpillsCR = false;
/// Indicates whether VRSAVE is spilled in the current function.
- bool SpillsVRSAVE;
+ bool SpillsVRSAVE = false;
/// LRStoreRequired - The bool indicates whether there is some explicit use of
/// the LR/LR8 stack slot that is not obvious from scanning the code. This
/// requires that the code generator produce a store of LR to the stack on
/// entry, even though LR may otherwise apparently not be used.
- bool LRStoreRequired;
+ bool LRStoreRequired = false;
/// This function makes use of the PPC64 ELF TOC base pointer (register r2).
- bool UsesTOCBasePtr;
+ bool UsesTOCBasePtr = false;
/// MinReservedArea - This is the frame size that is at least reserved in a
/// potential caller (parameter+linkage area).
- unsigned MinReservedArea;
+ unsigned MinReservedArea = 0;
/// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
/// amount the stack pointer is adjusted to make the frame bigger for tail
/// calls. Used for creating an area before the register spill area.
- int TailCallSPDelta;
+ int TailCallSPDelta = 0;
/// HasFastCall - Does this function contain a fast call. Used to determine
/// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
- bool HasFastCall;
+ bool HasFastCall = false;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
+ int VarArgsFrameIndex = 0;
+
/// VarArgsStackOffset - StackOffset for start of stack
/// arguments.
- int VarArgsStackOffset;
+
+ int VarArgsStackOffset = 0;
+
/// VarArgsNumGPR - Index of the first unused integer
/// register for parameter passing.
- unsigned VarArgsNumGPR;
+ unsigned VarArgsNumGPR = 0;
+
/// VarArgsNumFPR - Index of the first unused double
/// register for parameter passing.
- unsigned VarArgsNumFPR;
+ unsigned VarArgsNumFPR = 0;
/// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
- int CRSpillFrameIndex;
+ int CRSpillFrameIndex = 0;
/// If any of CR[2-4] need to be saved in the prologue and restored in the
/// epilogue then they are added to this array. This is used for the
@@ -102,35 +107,14 @@ class PPCFunctionInfo : public MachineFunctionInfo {
MachineFunction &MF;
/// Whether this uses the PIC Base register or not.
- bool UsesPICBase;
+ bool UsesPICBase = false;
/// True if this function has a subset of CSRs that is handled explicitly via
/// copies
- bool IsSplitCSR;
+ bool IsSplitCSR = false;
public:
- explicit PPCFunctionInfo(MachineFunction &MF)
- : FramePointerSaveIndex(0),
- ReturnAddrSaveIndex(0),
- BasePointerSaveIndex(0),
- PICBasePointerSaveIndex(0),
- HasSpills(false),
- HasNonRISpills(false),
- SpillsCR(false),
- SpillsVRSAVE(false),
- LRStoreRequired(false),
- UsesTOCBasePtr(false),
- MinReservedArea(0),
- TailCallSPDelta(0),
- HasFastCall(false),
- VarArgsFrameIndex(0),
- VarArgsStackOffset(0),
- VarArgsNumGPR(0),
- VarArgsNumFPR(0),
- CRSpillFrameIndex(0),
- MF(MF),
- UsesPICBase(0),
- IsSplitCSR(false) {}
+ explicit PPCFunctionInfo(MachineFunction &MF) : MF(MF) {}
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -211,7 +195,6 @@ public:
MCSymbol *getTOCOffsetSymbol() const;
};
-} // end of namespace llvm
-
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index e49201402861..aad913924692 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -209,86 +209,67 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// The ZERO register is not really a register, but the representation of r0
// when used in instructions that treat r0 as the constant 0.
- Reserved.set(PPC::ZERO);
- Reserved.set(PPC::ZERO8);
+ markSuperRegs(Reserved, PPC::ZERO);
// The FP register is also not really a register, but is the representation
// of the frame pointer register used by ISD::FRAMEADDR.
- Reserved.set(PPC::FP);
- Reserved.set(PPC::FP8);
+ markSuperRegs(Reserved, PPC::FP);
// The BP register is also not really a register, but is the representation
// of the base pointer register used by setjmp.
- Reserved.set(PPC::BP);
- Reserved.set(PPC::BP8);
+ markSuperRegs(Reserved, PPC::BP);
// The counter registers must be reserved so that counter-based loops can
// be correctly formed (and the mtctr instructions are not DCE'd).
- Reserved.set(PPC::CTR);
- Reserved.set(PPC::CTR8);
+ markSuperRegs(Reserved, PPC::CTR);
+ markSuperRegs(Reserved, PPC::CTR8);
- Reserved.set(PPC::R1);
- Reserved.set(PPC::LR);
- Reserved.set(PPC::LR8);
- Reserved.set(PPC::RM);
+ markSuperRegs(Reserved, PPC::R1);
+ markSuperRegs(Reserved, PPC::LR);
+ markSuperRegs(Reserved, PPC::LR8);
+ markSuperRegs(Reserved, PPC::RM);
if (!Subtarget.isDarwinABI() || !Subtarget.hasAltivec())
- Reserved.set(PPC::VRSAVE);
+ markSuperRegs(Reserved, PPC::VRSAVE);
// The SVR4 ABI reserves r2 and r13
if (Subtarget.isSVR4ABI()) {
- Reserved.set(PPC::R2); // System-reserved register
- Reserved.set(PPC::R13); // Small Data Area pointer register
+ // We only reserve r2 if we need to use the TOC pointer. If we have no
+ // explicit uses of the TOC pointer (meaning we're a leaf function with
+ // no constant-pool loads, etc.) and we have no potential uses inside an
+ // inline asm block, then we can treat r2 has an ordinary callee-saved
+ // register.
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ if (!TM.isPPC64() || FuncInfo->usesTOCBasePtr() || MF.hasInlineAsm())
+ markSuperRegs(Reserved, PPC::R2); // System-reserved register
+ markSuperRegs(Reserved, PPC::R13); // Small Data Area pointer register
}
// On PPC64, r13 is the thread pointer. Never allocate this register.
- if (TM.isPPC64()) {
- Reserved.set(PPC::R13);
-
- Reserved.set(PPC::X1);
- Reserved.set(PPC::X13);
-
- if (TFI->needsFP(MF))
- Reserved.set(PPC::X31);
-
- if (hasBasePointer(MF))
- Reserved.set(PPC::X30);
-
- // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
- if (Subtarget.isSVR4ABI()) {
- // We only reserve r2 if we need to use the TOC pointer. If we have no
- // explicit uses of the TOC pointer (meaning we're a leaf function with
- // no constant-pool loads, etc.) and we have no potential uses inside an
- // inline asm block, then we can treat r2 has an ordinary callee-saved
- // register.
- const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- if (FuncInfo->usesTOCBasePtr() || MF.hasInlineAsm())
- Reserved.set(PPC::X2);
- else
- Reserved.reset(PPC::R2);
- }
- }
+ if (TM.isPPC64())
+ markSuperRegs(Reserved, PPC::R13);
if (TFI->needsFP(MF))
- Reserved.set(PPC::R31);
+ markSuperRegs(Reserved, PPC::R31);
bool IsPositionIndependent = TM.isPositionIndependent();
if (hasBasePointer(MF)) {
if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent)
- Reserved.set(PPC::R29);
+ markSuperRegs(Reserved, PPC::R29);
else
- Reserved.set(PPC::R30);
+ markSuperRegs(Reserved, PPC::R30);
}
if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent)
- Reserved.set(PPC::R30);
+ markSuperRegs(Reserved, PPC::R30);
// Reserve Altivec registers when Altivec is unavailable.
if (!Subtarget.hasAltivec())
for (TargetRegisterClass::iterator I = PPC::VRRCRegClass.begin(),
IE = PPC::VRRCRegClass.end(); I != IE; ++I)
- Reserved.set(*I);
+ markSuperRegs(Reserved, *I);
+ assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td
index 8e52da583a0d..79963dd6a3e9 100644
--- a/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/lib/Target/PowerPC/PPCScheduleP8.td
@@ -377,7 +377,7 @@ def P8Itineraries : ProcessorItineraries<
InstrStage<1, [P8_FPU1, P8_FPU2]>],
[7, 1, 1]>,
InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
- InstrStage<1, [P8_FPU2, P8_FPU2]>],
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
[3, 1, 1]>
]>;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index e8a87e7f4437..ccf0f80c336b 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -220,8 +220,8 @@ bool PPCSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
}
-unsigned char PPCSubtarget::classifyGlobalReference(
- const GlobalValue *GV) const {
+unsigned char
+PPCSubtarget::classifyGlobalReference(const GlobalValue *GV) const {
// Note that currently we don't generate non-pic references.
// If a caller wants that, this will have to be updated.
@@ -229,23 +229,9 @@ unsigned char PPCSubtarget::classifyGlobalReference(
if (TM.getCodeModel() == CodeModel::Large)
return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
- unsigned char flags = PPCII::MO_PIC_FLAG;
-
- // Only if the relocation mode is PIC do we have to worry about
- // interposition. In all other cases we can use a slightly looser standard to
- // decide how to access the symbol.
- if (TM.getRelocationModel() == Reloc::PIC_) {
- // If it's local, or it's non-default, it can't be interposed.
- if (!GV->hasLocalLinkage() &&
- GV->hasDefaultVisibility()) {
- flags |= PPCII::MO_NLP_FLAG;
- }
- return flags;
- }
-
- if (GV->isStrongDefinitionForLinker())
- return flags;
- return flags | PPCII::MO_NLP_FLAG;
+ if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ return PPCII::MO_PIC_FLAG;
+ return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
}
bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 7fd907990ceb..5a97f595ad8c 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -298,7 +298,9 @@ public:
bool isSVR4ABI() const { return !isDarwinABI(); }
bool isELFv2ABI() const;
- bool enableEarlyIfConversion() const override { return hasISEL(); }
+ /// Originally, this function return hasISEL(). Now we always enable it,
+ /// but may expand the ISEL instruction later.
+ bool enableEarlyIfConversion() const override { return true; }
// Scheduling customization.
bool enableMachineScheduler() const override;
@@ -316,6 +318,8 @@ public:
/// classifyGlobalReference - Classify a global variable reference for the
/// current subtarget accourding to how we should reference it.
unsigned char classifyGlobalReference(const GlobalValue *GV) const;
+
+ bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
};
} // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 91b1d24b2e41..7806d45b5457 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -11,21 +11,33 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
+#include "PPCSubtarget.h"
#include "PPCTargetObjectFile.h"
+#include "PPCTargetMachine.h"
#include "PPCTargetTransformInfo.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
+#include <cassert>
+#include <memory>
+#include <string>
+
using namespace llvm;
static cl::
@@ -80,6 +92,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializePPCBoolRetToIntPass(PR);
+ initializePPCExpandISELPass(PR);
}
/// Return the datalayout string of a subtarget.
@@ -149,9 +162,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
// If it isn't a Mach-O file then it's going to be a linux ELF
// object file.
if (TT.isOSDarwin())
- return make_unique<TargetLoweringObjectFileMachO>();
+ return llvm::make_unique<TargetLoweringObjectFileMachO>();
- return make_unique<PPC64LinuxTargetObjectFile>();
+ return llvm::make_unique<PPC64LinuxTargetObjectFile>();
}
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
@@ -205,15 +218,13 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
computeFSAdditions(FS, OL, TT), Options,
getEffectiveRelocModel(TT, RM), CM, OL),
TLOF(createTLOF(getTargetTriple())),
- TargetABI(computeTargetABI(TT, Options)),
- Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
-
+ TargetABI(computeTargetABI(TT, Options)) {
initAsmInfo();
}
-PPCTargetMachine::~PPCTargetMachine() {}
+PPCTargetMachine::~PPCTargetMachine() = default;
-void PPC32TargetMachine::anchor() { }
+void PPC32TargetMachine::anchor() {}
PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -223,7 +234,7 @@ PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
-void PPC64TargetMachine::anchor() { }
+void PPC64TargetMachine::anchor() {}
PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -281,6 +292,7 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
//===----------------------------------------------------------------------===//
namespace {
+
/// PPC Code Generator Pass Configuration Options.
class PPCPassConfig : public TargetPassConfig {
public:
@@ -300,7 +312,8 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
-} // namespace
+
+} // end anonymous namespace
TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
return new PPCPassConfig(this, PM);
@@ -416,6 +429,8 @@ void PPCPassConfig::addPreSched2() {
}
void PPCPassConfig::addPreEmitPass() {
+ addPass(createPPCExpandISELPass());
+
if (getOptLevel() != CodeGenOpt::None)
addPass(createPPCEarlyReturnPass(), false);
// Must run branch selection immediately preceding the asm printer.
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 59b4f1e30c0e..f2838351cee5 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -29,7 +29,6 @@ public:
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
PPCABI TargetABI;
- PPCSubtarget Subtarget;
mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index dbe7617d3542..310fea9ef09f 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -1,4 +1,4 @@
-//===-- PPCTargetStreamer.h - PPC Target Streamer --s-----------*- C++ -*--===//
+//===- PPCTargetStreamer.h - PPC Target Streamer ----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,18 +10,26 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
#define LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCStreamer.h"
namespace llvm {
+
+class MCExpr;
+class MCSymbol;
+class MCSymbolELF;
+
class PPCTargetStreamer : public MCTargetStreamer {
public:
PPCTargetStreamer(MCStreamer &S);
~PPCTargetStreamer() override;
+
virtual void emitTCEntry(const MCSymbol &S) = 0;
virtual void emitMachine(StringRef CPU) = 0;
virtual void emitAbiVersion(int AbiVersion) = 0;
virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0;
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index f94d1eab097d..7ee1317bf72f 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -302,14 +302,16 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first;
}
-int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@@ -352,7 +354,7 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
}
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace, const Instruction *I) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
@@ -401,6 +403,10 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
if (IsVSXType || (ST->hasVSX() && IsAltivecType))
return Cost;
+ // Newer PPC supports unaligned memory access.
+ if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))
+ return Cost;
+
// PPC in general does not support unaligned loads and stores. They'll need
// to be decomposed based on the alignment factor.
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 30ee2814aba1..6ce70fbd8778 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -74,11 +74,13 @@ public:
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp
index 3b5d8f094fd0..f3a0290da054 100644
--- a/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -112,7 +112,7 @@ protected:
TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
.addImm(1) // add 1, not 0, because there is no implicit clearing
// of the high bits.
- .addOperand(SrcMO)
+ .add(SrcMO)
.addImm(PPC::sub_64);
// The source of the original copy is now the new virtual register.
@@ -132,7 +132,7 @@ protected:
unsigned NewVReg = MRI.createVirtualRegister(DstRC);
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
NewVReg)
- .addOperand(SrcMO);
+ .add(SrcMO);
// Transform the original copy into a subregister extraction copy.
SrcMO.setReg(NewVReg);
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 8197285b7b1f..d3434b77be8a 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -522,7 +522,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
if (RelevantFunction) {
DEBUG(dbgs() << "Swap vector when first built\n\n");
- dumpSwapVector();
+ DEBUG(dumpSwapVector());
}
return RelevantFunction;
@@ -731,7 +731,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
}
DEBUG(dbgs() << "Swap vector after web analysis:\n\n");
- dumpSwapVector();
+ DEBUG(dumpSwapVector());
}
// Walk the swap vector entries looking for swaps fed by permuting loads
@@ -936,9 +936,9 @@ bool PPCVSXSwapRemoval::removeSwaps() {
Changed = true;
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
MachineBasicBlock *MBB = MI->getParent();
- BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
- .addOperand(MI->getOperand(1));
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ MI->getOperand(0).getReg())
+ .add(MI->getOperand(1));
DEBUG(dbgs() << format("Replaced %d with copy: ",
SwapVector[EntryIdx].VSEId));
@@ -951,77 +951,78 @@ bool PPCVSXSwapRemoval::removeSwaps() {
return Changed;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// For debug purposes, dump the contents of the swap vector.
-void PPCVSXSwapRemoval::dumpSwapVector() {
+LLVM_DUMP_METHOD void PPCVSXSwapRemoval::dumpSwapVector() {
for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
int ID = SwapVector[EntryIdx].VSEId;
- DEBUG(dbgs() << format("%6d", ID));
- DEBUG(dbgs() << format("%6d", EC->getLeaderValue(ID)));
- DEBUG(dbgs() << format(" BB#%3d", MI->getParent()->getNumber()));
- DEBUG(dbgs() << format(" %14s ",
- TII->getName(MI->getOpcode()).str().c_str()));
+ dbgs() << format("%6d", ID);
+ dbgs() << format("%6d", EC->getLeaderValue(ID));
+ dbgs() << format(" BB#%3d", MI->getParent()->getNumber());
+ dbgs() << format(" %14s ", TII->getName(MI->getOpcode()).str().c_str());
if (SwapVector[EntryIdx].IsLoad)
- DEBUG(dbgs() << "load ");
+ dbgs() << "load ";
if (SwapVector[EntryIdx].IsStore)
- DEBUG(dbgs() << "store ");
+ dbgs() << "store ";
if (SwapVector[EntryIdx].IsSwap)
- DEBUG(dbgs() << "swap ");
+ dbgs() << "swap ";
if (SwapVector[EntryIdx].MentionsPhysVR)
- DEBUG(dbgs() << "physreg ");
+ dbgs() << "physreg ";
if (SwapVector[EntryIdx].MentionsPartialVR)
- DEBUG(dbgs() << "partialreg ");
+ dbgs() << "partialreg ";
if (SwapVector[EntryIdx].IsSwappable) {
- DEBUG(dbgs() << "swappable ");
+ dbgs() << "swappable ";
switch(SwapVector[EntryIdx].SpecialHandling) {
default:
- DEBUG(dbgs() << "special:**unknown**");
+ dbgs() << "special:**unknown**";
break;
case SH_NONE:
break;
case SH_EXTRACT:
- DEBUG(dbgs() << "special:extract ");
+ dbgs() << "special:extract ";
break;
case SH_INSERT:
- DEBUG(dbgs() << "special:insert ");
+ dbgs() << "special:insert ";
break;
case SH_NOSWAP_LD:
- DEBUG(dbgs() << "special:load ");
+ dbgs() << "special:load ";
break;
case SH_NOSWAP_ST:
- DEBUG(dbgs() << "special:store ");
+ dbgs() << "special:store ";
break;
case SH_SPLAT:
- DEBUG(dbgs() << "special:splat ");
+ dbgs() << "special:splat ";
break;
case SH_XXPERMDI:
- DEBUG(dbgs() << "special:xxpermdi ");
+ dbgs() << "special:xxpermdi ";
break;
case SH_COPYWIDEN:
- DEBUG(dbgs() << "special:copywiden ");
+ dbgs() << "special:copywiden ";
break;
}
}
if (SwapVector[EntryIdx].WebRejected)
- DEBUG(dbgs() << "rejected ");
+ dbgs() << "rejected ";
if (SwapVector[EntryIdx].WillRemove)
- DEBUG(dbgs() << "remove ");
+ dbgs() << "remove ";
- DEBUG(dbgs() << "\n");
+ dbgs() << "\n";
// For no-asserts builds.
(void)MI;
(void)ID;
}
- DEBUG(dbgs() << "\n");
+ dbgs() << "\n";
}
+#endif
} // end default namespace
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index f8ef142255c8..d6f2672271e9 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -33,7 +33,7 @@ public:
~RISCVAsmBackend() override {}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -71,7 +71,7 @@ bool RISCVAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
void RISCVAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
return;
}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 4fc69a7fcaba..41be0a2084b3 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -44,13 +44,12 @@ static MCRegisterInfo *createRISCVMCRegisterInfo(const Triple &TT) {
static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TT) {
- MCAsmInfo *MAI = new RISCVMCAsmInfo(TT);
- return MAI;
+ return new RISCVMCAsmInfo(TT);
}
extern "C" void LLVMInitializeRISCVTargetMC() {
for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target()}) {
- RegisterMCAsmInfoFn X(*T, createRISCVMCAsmInfo);
+ TargetRegistry::RegisterMCAsmInfo(*T, createRISCVMCAsmInfo);
TargetRegistry::RegisterMCInstrInfo(*T, createRISCVMCInstrInfo);
TargetRegistry::RegisterMCRegInfo(*T, createRISCVMCRegisterInfo);
TargetRegistry::RegisterMCAsmBackend(*T, createRISCVAsmBackend);
diff --git a/lib/Target/RISCV/RISCVInstrFormats.td b/lib/Target/RISCV/RISCVInstrFormats.td
index 1e9bc3bf9bc5..3fab7122f6f1 100644
--- a/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/lib/Target/RISCV/RISCVInstrFormats.td
@@ -44,8 +44,9 @@ class RISCVInst<dag outs, dag ins, string asmstr, list<dag> pattern>
// Pseudo instructions
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : RISCVInst<outs, ins, asmstr, pattern> {
+ : RISCVInst<outs, ins, "", pattern> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
}
class FR<bits<7> funct7, bits<3> funct3, bits<7> opcode, dag outs, dag ins,
diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp
index afbbe004186e..a20331cd0a3e 100644
--- a/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -32,7 +32,7 @@ static std::string computeDataLayout(const Triple &TT) {
return "e-m:e-i64:64-n32:64-S128";
} else {
assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
- return "e-m:e-i64:64-n32-S128";
+ return "e-m:e-p:32:32-i64:64-n32-S128";
}
}
@@ -51,7 +51,9 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, RM), CM, OL),
- TLOF(make_unique<TargetLoweringObjectFileELF>()) {}
+ TLOF(make_unique<TargetLoweringObjectFileELF>()) {
+ initAsmInfo();
+}
TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) {
return new TargetPassConfig(this, PM);
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index e775aa607b53..7e6dff6b7894 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -9,32 +9,49 @@
#include "MCTargetDesc/SparcMCExpr.h"
#include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
using namespace llvm;
// The generated AsmMatcher SparcGenAsmMatcher uses "Sparc" as the target
// namespace. But SPARC backend uses "SP" as its namespace.
namespace llvm {
- namespace Sparc {
+namespace Sparc {
+
using namespace SP;
- }
-}
+
+} // end namespace Sparc
+} // end namespace llvm
namespace {
+
class SparcOperand;
-class SparcAsmParser : public MCTargetAsmParser {
+class SparcAsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
/// @name Auto-generated Match Functions
@@ -95,9 +112,10 @@ public:
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
-
};
+} // end anonymous namespace
+
static const MCPhysReg IntRegs[32] = {
Sparc::G0, Sparc::G1, Sparc::G2, Sparc::G3,
Sparc::G4, Sparc::G5, Sparc::G6, Sparc::G7,
@@ -166,6 +184,8 @@ public:
Sparc::C16_C17, Sparc::C18_C19, Sparc::C20_C21, Sparc::C22_C23,
Sparc::C24_C25, Sparc::C26_C27, Sparc::C28_C29, Sparc::C30_C31};
+namespace {
+
/// SparcOperand - Instances of this class represent a parsed Sparc machine
/// instruction.
class SparcOperand : public MCParsedAsmOperand {
@@ -219,6 +239,7 @@ private:
struct ImmOp Imm;
struct MemOp Mem;
};
+
public:
SparcOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -464,7 +485,7 @@ public:
}
};
-} // end namespace
+} // end anonymous namespace
bool SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
@@ -591,9 +612,8 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
llvm_unreachable("Implement any new match types added!");
}
-bool SparcAsmParser::
-ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc)
-{
+bool SparcAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = Parser.getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
@@ -695,7 +715,7 @@ ParseDirective(AsmToken DirectiveID)
bool SparcAsmParser:: parseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- for (;;) {
+ while (true) {
const MCExpr *Value;
if (getParser().parseExpression(Value))
return true;
@@ -717,7 +737,6 @@ bool SparcAsmParser:: parseDirectiveWord(unsigned Size, SMLoc L) {
OperandMatchResultTy
SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
-
SMLoc S, E;
unsigned BaseReg = 0;
@@ -824,7 +843,6 @@ SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
OperandMatchResultTy
SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
bool isCall) {
-
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
const MCExpr *EVal;
@@ -910,11 +928,9 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
OperandMatchResultTy
SparcAsmParser::parseBranchModifiers(OperandVector &Operands) {
-
// parse (,a|,pn|,pt)+
while (getLexer().is(AsmToken::Comma)) {
-
Parser.Lex(); // Eat the comma
if (!getLexer().is(AsmToken::Identifier))
@@ -929,10 +945,8 @@ SparcAsmParser::parseBranchModifiers(OperandVector &Operands) {
return MatchOperand_Success;
}
-bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
- unsigned &RegNo,
- unsigned &RegKind)
-{
+bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
+ unsigned &RegKind) {
int64_t intVal = 0;
RegNo = 0;
RegKind = SparcOperand::rk_None;
@@ -1211,8 +1225,7 @@ static bool hasGOTReference(const MCExpr *Expr) {
const SparcMCExpr *
SparcAsmParser::adjustPICRelocation(SparcMCExpr::VariantKind VK,
- const MCExpr *subExpr)
-{
+ const MCExpr *subExpr) {
// When in PIC mode, "%lo(...)" and "%hi(...)" behave differently.
// If the expression refers contains _GLOBAL_OFFSETE_TABLE, it is
// actually a %pc10 or %pc22 relocation. Otherwise, they are interpreted
@@ -1236,8 +1249,7 @@ SparcAsmParser::adjustPICRelocation(SparcMCExpr::VariantKind VK,
}
bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
- SMLoc &EndLoc)
-{
+ SMLoc &EndLoc) {
AsmToken Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
return false;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 6106a6c32dc8..cc07547ede2c 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -274,7 +274,8 @@ namespace {
SparcAsmBackend(T), OSType(OSType) { }
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override {
+ uint64_t Value, bool IsPCRel,
+ MCContext &Ctx) const override {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 280c6d7937b2..3ed09898fb78 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SparcMCAsmInfo.cpp - Sparc asm properties -------------------------===//
+//===- SparcMCAsmInfo.cpp - Sparc asm properties --------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,7 +14,10 @@
#include "SparcMCAsmInfo.h"
#include "SparcMCExpr.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/Dwarf.h"
using namespace llvm;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index ad441227600e..5e8d0cb50312 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -1,4 +1,4 @@
-//===-- SparcMCAsmInfo.h - Sparc asm properties ----------------*- C++ -*--===//
+//===- SparcMCAsmInfo.h - Sparc asm properties -----------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,6 +17,7 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
+
class Triple;
class SparcELFMCAsmInfo : public MCAsmInfoELF {
@@ -24,6 +25,7 @@ class SparcELFMCAsmInfo : public MCAsmInfoELF {
public:
explicit SparcELFMCAsmInfo(const Triple &TheTriple);
+
const MCExpr*
getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
MCStreamer &Streamer) const override;
@@ -33,6 +35,6 @@ public:
};
-} // namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCASMINFO_H
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 86341c61d1e2..684f66970dbe 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -11,20 +11,29 @@
//
//===----------------------------------------------------------------------===//
-#include "SparcMCExpr.h"
#include "MCTargetDesc/SparcFixupKinds.h"
+#include "SparcMCExpr.h"
#include "SparcMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -33,17 +42,17 @@ using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace {
+
class SparcMCCodeEmitter : public MCCodeEmitter {
- SparcMCCodeEmitter(const SparcMCCodeEmitter &) = delete;
- void operator=(const SparcMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
MCContext &Ctx;
public:
SparcMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: MCII(mcii), Ctx(ctx) {}
-
- ~SparcMCCodeEmitter() override {}
+ SparcMCCodeEmitter(const SparcMCCodeEmitter &) = delete;
+ SparcMCCodeEmitter &operator=(const SparcMCCodeEmitter &) = delete;
+ ~SparcMCCodeEmitter() override = default;
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
@@ -79,13 +88,8 @@ private:
void verifyInstructionPredicates(const MCInst &MI,
uint64_t AvailableFeatures) const;
};
-} // end anonymous namespace
-MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new SparcMCCodeEmitter(MCII, Ctx);
-}
+} // end anonymous namespace
void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
@@ -121,12 +125,10 @@ void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
-
unsigned SparcMCCodeEmitter::
getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
if (MO.isReg())
return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
@@ -209,6 +211,7 @@ getBranchPredTargetOpValue(const MCInst &MI, unsigned OpNo,
(MCFixupKind)Sparc::fixup_sparc_br19));
return 0;
}
+
unsigned SparcMCCodeEmitter::
getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
@@ -227,3 +230,9 @@ getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo,
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "SparcGenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new SparcMCCodeEmitter(MCII, Ctx);
+}
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 122f830e0dc5..c07cc213c3ed 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -288,11 +288,11 @@ static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
{
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
- if (!MRI->reg_nodbg_empty(reg))
+ if (MRI->isPhysRegUsed(reg))
return false;
for (unsigned reg = SP::L0; reg <= SP::L7; ++reg)
- if (!MRI->reg_nodbg_empty(reg))
+ if (MRI->isPhysRegUsed(reg))
return false;
return true;
@@ -305,8 +305,8 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
MachineFrameInfo &MFI = MF.getFrameInfo();
return !(MFI.hasCalls() // has calls
- || !MRI.reg_nodbg_empty(SP::L0) // Too many registers needed
- || !MRI.reg_nodbg_empty(SP::O6) // %SP is used
+ || MRI.isPhysRegUsed(SP::L0) // Too many registers needed
+ || MRI.isPhysRegUsed(SP::O6) // %SP is used
|| hasFP(MF)); // need %FP
}
@@ -314,11 +314,10 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
// Remap %i[0-7] to %o[0-7].
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
- if (MRI.reg_nodbg_empty(reg))
+ if (!MRI.isPhysRegUsed(reg))
continue;
unsigned mapped_reg = reg - SP::I0 + SP::O0;
- assert(MRI.reg_nodbg_empty(mapped_reg));
// Replace I register with O register.
MRI.replaceRegWith(reg, mapped_reg);
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 2ac9aae2471b..455d1ee1564a 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1877,6 +1877,7 @@ void SparcTargetLowering::computeKnownBitsForTargetNode
(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
APInt KnownZero2, KnownOne2;
@@ -2177,8 +2178,8 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
Entry.Node = RetPtr;
Entry.Ty = PointerType::getUnqual(RetTy);
if (!Subtarget->is64Bit())
- Entry.isSRet = true;
- Entry.isReturned = false;
+ Entry.IsSRet = true;
+ Entry.IsReturned = false;
Args.push_back(Entry);
RetTyABI = Type::getVoidTy(*DAG.getContext());
}
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index e0a421b83712..90d03984060c 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -68,6 +68,7 @@ namespace llvm {
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index a94717c93456..3f91ca9035a6 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -8,16 +8,31 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
using namespace llvm;
@@ -31,6 +46,7 @@ static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) {
}
namespace {
+
enum RegisterKind {
GR32Reg,
GRH32Reg,
@@ -56,7 +72,6 @@ enum MemoryKind {
};
class SystemZOperand : public MCParsedAsmOperand {
-public:
private:
enum OperandKind {
KindInvalid,
@@ -140,12 +155,14 @@ public:
SMLoc EndLoc) {
return make_unique<SystemZOperand>(KindInvalid, StartLoc, EndLoc);
}
+
static std::unique_ptr<SystemZOperand> createToken(StringRef Str, SMLoc Loc) {
auto Op = make_unique<SystemZOperand>(KindToken, Loc, Loc);
Op->Token.Data = Str.data();
Op->Token.Length = Str.size();
return Op;
}
+
static std::unique_ptr<SystemZOperand>
createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) {
auto Op = make_unique<SystemZOperand>(KindReg, StartLoc, EndLoc);
@@ -153,12 +170,14 @@ public:
Op->Reg.Num = Num;
return Op;
}
+
static std::unique_ptr<SystemZOperand>
createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) {
auto Op = make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc);
Op->Imm = Expr;
return Op;
}
+
static std::unique_ptr<SystemZOperand>
createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base,
const MCExpr *Disp, unsigned Index, const MCExpr *LengthImm,
@@ -175,6 +194,7 @@ public:
Op->Mem.Length.Reg = LengthReg;
return Op;
}
+
static std::unique_ptr<SystemZOperand>
createImmTLS(const MCExpr *Imm, const MCExpr *Sym,
SMLoc StartLoc, SMLoc EndLoc) {
@@ -503,6 +523,7 @@ public:
return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true);
}
};
+
} // end anonymous namespace
#define GET_REGISTER_MATCHER
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 1806e015f61e..a281a0aa6bcc 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -7,12 +7,16 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "SystemZ.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -21,17 +25,19 @@ using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
+
class SystemZDisassembler : public MCDisassembler {
public:
SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- ~SystemZDisassembler() override {}
+ ~SystemZDisassembler() override = default;
DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
};
+
} // end anonymous namespace
static MCDisassembler *createSystemZDisassembler(const Target &T,
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 1207c7b327e8..6cd12e13e220 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax ===//
+//===- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax -===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,10 +10,13 @@
#include "SystemZInstPrinter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index 6336f5ee0efa..d65c661545eb 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -15,8 +15,10 @@
#define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
+#include <cstdint>
namespace llvm {
+
class MCOperand;
class SystemZInstPrinter : public MCInstPrinter {
@@ -70,6 +72,7 @@ private:
// This forms part of the instruction name rather than the operand list.
void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O);
};
+
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 9192448afd04..23b7d5b5d501 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -51,7 +51,7 @@ public:
}
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
bool mayNeedRelaxation(const MCInst &Inst) const override {
return false;
}
@@ -91,7 +91,7 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
MCFixupKind Kind = Fixup.getKind();
unsigned Offset = Fixup.getOffset();
unsigned BitSize = getFixupKindInfo(Kind).TargetSize;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index 7082abad716d..092eb4011adc 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -11,20 +11,28 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "MCTargetDesc/SystemZMCFixups.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
namespace {
+
class SystemZMCCodeEmitter : public MCCodeEmitter {
const MCInstrInfo &MCII;
MCContext &Ctx;
@@ -34,7 +42,7 @@ public:
: MCII(mcii), Ctx(ctx) {
}
- ~SystemZMCCodeEmitter() override {}
+ ~SystemZMCCodeEmitter() override = default;
// OVerride MCCodeEmitter.
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -137,13 +145,8 @@ private:
void verifyInstructionPredicates(const MCInst &MI,
uint64_t AvailableFeatures) const;
};
-} // end anonymous namespace
-MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new SystemZMCCodeEmitter(MCII, Ctx);
-}
+} // end anonymous namespace
void SystemZMCCodeEmitter::
encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -282,3 +285,9 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "SystemZGenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new SystemZMCCodeEmitter(MCII, Ctx);
+}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 43a96e84289c..3de570bf30cc 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -7,35 +7,38 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "MCTargetDesc/SystemZMCFixups.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
namespace {
+
class SystemZObjectWriter : public MCELFObjectTargetWriter {
public:
SystemZObjectWriter(uint8_t OSABI);
-
- ~SystemZObjectWriter() override;
+ ~SystemZObjectWriter() override = default;
protected:
// Override MCELFObjectTargetWriter.
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
};
+
} // end anonymous namespace
SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390,
/*HasRelocationAddend=*/ true) {}
-SystemZObjectWriter::~SystemZObjectWriter() {
-}
-
// Return the relocation type for an absolute value of MCFixupKind Kind.
static unsigned getAbsoluteReloc(unsigned Kind) {
switch (Kind) {
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index b4c843f658aa..d70f9e90cd3e 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -13,15 +13,23 @@
//
//===----------------------------------------------------------------------===//
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
#include "SystemZTargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -33,11 +41,11 @@ STATISTIC(EliminatedComparisons, "Number of eliminated comparisons");
STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions");
namespace {
+
// Represents the references to a particular register in one or more
// instructions.
struct Reference {
- Reference()
- : Def(false), Use(false) {}
+ Reference() = default;
Reference &operator|=(const Reference &Other) {
Def |= Other.Def;
@@ -49,15 +57,16 @@ struct Reference {
// True if the register is defined or used in some form, either directly or
// via a sub- or super-register.
- bool Def;
- bool Use;
+ bool Def = false;
+ bool Use = false;
};
class SystemZElimCompare : public MachineFunctionPass {
public:
static char ID;
+
SystemZElimCompare(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {}
+ : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "SystemZ Comparison Elimination";
@@ -65,6 +74,7 @@ public:
bool processBlock(MachineBasicBlock &MBB);
bool runOnMachineFunction(MachineFunction &F) override;
+
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@@ -84,16 +94,13 @@ private:
bool fuseCompareOperations(MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers);
- const SystemZInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ const SystemZInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
};
char SystemZElimCompare::ID = 0;
-} // end anonymous namespace
-FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) {
- return new SystemZElimCompare(TM);
-}
+} // end anonymous namespace
// Return true if CC is live out of MBB.
static bool isCCLiveOut(MachineBasicBlock &MBB) {
@@ -167,7 +174,7 @@ static unsigned getCompareSourceReg(MachineInstr &Compare) {
reg = Compare.getOperand(0).getReg();
else if (isLoadAndTestAsCmp(Compare))
reg = Compare.getOperand(1).getReg();
- assert (reg);
+ assert(reg);
return reg;
}
@@ -216,9 +223,7 @@ bool SystemZElimCompare::convertToBRCT(
Branch->RemoveOperand(0);
Branch->setDesc(TII->get(BRCT));
MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
- MIB.addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(Target);
+ MIB.add(MI.getOperand(0)).add(MI.getOperand(1)).add(Target);
// Add a CC def to BRCT(G), since we may have to split them again if the
// branch displacement overflows. BRCTH has a 32-bit displacement, so
// this is not necessary there.
@@ -261,10 +266,10 @@ bool SystemZElimCompare::convertToLoadAndTrap(
Branch->RemoveOperand(0);
Branch->setDesc(TII->get(LATOpcode));
MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
- .addOperand(MI.getOperand(3));
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
MI.eraseFromParent();
return true;
}
@@ -368,10 +373,8 @@ static bool isCompareZero(MachineInstr &Compare) {
return true;
default:
-
if (isLoadAndTestAsCmp(Compare))
return true;
-
return Compare.getNumExplicitOperands() == 2 &&
Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
}
@@ -502,15 +505,15 @@ bool SystemZElimCompare::fuseCompareOperations(
Branch->setDesc(TII->get(FusedOpcode));
MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
for (unsigned I = 0; I < SrcNOps; I++)
- MIB.addOperand(Compare.getOperand(I));
- MIB.addOperand(CCMask);
+ MIB.add(Compare.getOperand(I));
+ MIB.add(CCMask);
if (Type == SystemZII::CompareAndBranch) {
// Only conditional branches define CC, as they may be converted back
// to a non-fused branch because of a long displacement. Conditional
// returns don't have that problem.
- MIB.addOperand(Target)
- .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
+ MIB.add(Target).addReg(SystemZ::CC,
+ RegState::ImplicitDefine | RegState::Dead);
}
if (Type == SystemZII::CompareAndSibcall)
@@ -573,3 +576,7 @@ bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
return Changed;
}
+
+FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) {
+ return new SystemZElimCompare(TM);
+}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2d0a06af18ae..84d3c7bed50a 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -194,6 +194,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMUL_LOHI, VT, Custom);
// Only z196 and above have native support for conversions to unsigned.
+ // On z10, promoting to i64 doesn't generate an inexact condition for
+ // values that are outside the i32 range but in the i64 range, so use
+ // the default expansion.
if (!Subtarget.hasFPExtension())
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
}
@@ -344,9 +347,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// There should be no need to check for float types other than v2f64
// since <2 x f32> isn't a legal type.
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
}
// Handle floating-point types.
@@ -2789,8 +2796,9 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
// but we need this case for bitcasts that are created during lowering
// and which are then lowered themselves.
if (auto *LoadN = dyn_cast<LoadSDNode>(In))
- return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
- LoadN->getMemOperand());
+ if (ISD::isNormalLoad(LoadN))
+ return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
+ LoadN->getMemOperand());
if (InVT == MVT::i32 && ResVT == MVT::f32) {
SDValue In64;
@@ -3802,7 +3810,7 @@ namespace {
struct GeneralShuffle {
GeneralShuffle(EVT vt) : VT(vt) {}
void addUndef();
- void add(SDValue, unsigned);
+ bool add(SDValue, unsigned);
SDValue getNode(SelectionDAG &, const SDLoc &);
// The operands of the shuffle.
@@ -3828,8 +3836,10 @@ void GeneralShuffle::addUndef() {
// Add an extra element to the shuffle, taking it from element Elem of Op.
// A null Op indicates a vector input whose value will be calculated later;
// there is at most one such input per shuffle and it always has the same
-// type as the result.
-void GeneralShuffle::add(SDValue Op, unsigned Elem) {
+// type as the result. Aborts and returns false if the source vector elements
+// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
+// LLVM they become implicitly extended, but this is rare and not optimized.
+bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
// The source vector can have wider elements than the result,
@@ -3837,8 +3847,12 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
// We want the least significant part.
EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
- assert(FromBytesPerElement >= BytesPerElement &&
- "Invalid EXTRACT_VECTOR_ELT");
+
+ // Return false if the source elements are smaller than their destination
+ // elements.
+ if (FromBytesPerElement < BytesPerElement)
+ return false;
+
unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
(FromBytesPerElement - BytesPerElement));
@@ -3856,13 +3870,13 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
break;
if (NewByte < 0) {
addUndef();
- return;
+ return true;
}
Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
Byte = unsigned(NewByte) % SystemZ::VectorBytes;
} else if (Op.isUndef()) {
addUndef();
- return;
+ return true;
} else
break;
}
@@ -3879,6 +3893,8 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
for (unsigned I = 0; I < BytesPerElement; ++I)
Bytes.push_back(Base + I);
+
+ return true;
}
// Return SDNodes for the completed shuffle.
@@ -4110,12 +4126,14 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op.getOperand(1).getOpcode() == ISD::Constant) {
unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- GS.add(Op.getOperand(0), Elem);
+ if (!GS.add(Op.getOperand(0), Elem))
+ return SDValue();
FoundOne = true;
} else if (Op.isUndef()) {
GS.addUndef();
} else {
- GS.add(SDValue(), ResidueOps.size());
+ if (!GS.add(SDValue(), ResidueOps.size()))
+ return SDValue();
ResidueOps.push_back(BVN->getOperand(I));
}
}
@@ -4354,9 +4372,9 @@ SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
int Elt = VSN->getMaskElt(I);
if (Elt < 0)
GS.addUndef();
- else
- GS.add(Op.getOperand(unsigned(Elt) / NumElements),
- unsigned(Elt) % NumElements);
+ else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
+ unsigned(Elt) % NumElements))
+ return SDValue();
}
return GS.getNode(DAG, SDLoc(VSN));
}
@@ -4722,9 +4740,12 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
// Return true if VT is a vector whose elements are a whole number of bytes
-// in width.
-static bool canTreatAsByteVector(EVT VT) {
- return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0;
+// in width. Also check for presence of vector support.
+bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
+ if (!Subtarget.hasVector())
+ return false;
+
+ return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
}
// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
@@ -4986,6 +5007,10 @@ SDValue SystemZTargetLowering::combineSTORE(
SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
SDNode *N, DAGCombinerInfo &DCI) const {
+
+ if (!Subtarget.hasVector())
+ return SDValue();
+
// Try to simplify a vector extraction.
if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
SDValue Op0 = N->getOperand(0);
@@ -5233,7 +5258,7 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
- .addOperand(Base)
+ .add(Base)
.addImm(0)
.addReg(0);
return Reg;
@@ -5322,8 +5347,11 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
if (Invert)
CCMask ^= CCValid;
BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
- .addReg(SrcReg).addOperand(Base).addImm(Disp)
- .addImm(CCValid).addImm(CCMask);
+ .addReg(SrcReg)
+ .add(Base)
+ .addImm(Disp)
+ .addImm(CCValid)
+ .addImm(CCMask);
MI.eraseFromParent();
return MBB;
}
@@ -5350,7 +5378,10 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
// # fallthrough to JoinMBB
MBB = FalseMBB;
BuildMI(MBB, DL, TII->get(StoreOpcode))
- .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
+ .addReg(SrcReg)
+ .add(Base)
+ .addImm(Disp)
+ .addReg(IndexReg);
MBB->addSuccessor(JoinMBB);
MI.eraseFromParent();
@@ -5415,8 +5446,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
// %OrigVal = L Disp(%Base)
// # fall through to LoopMMB
MBB = StartMBB;
- BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
- .addOperand(Base).addImm(Disp).addReg(0);
+ BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
MBB->addSuccessor(LoopMBB);
// LoopMBB:
@@ -5437,8 +5467,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
if (Invert) {
// Perform the operation normally and then invert every bit of the field.
unsigned Tmp = MRI.createVirtualRegister(RC);
- BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
- .addReg(RotatedOldVal).addOperand(Src2);
+ BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
if (BitSize <= 32)
// XILF with the upper BitSize bits set.
BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
@@ -5454,7 +5483,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
} else if (BinOpcode)
// A simply binary operation.
BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
- .addReg(RotatedOldVal).addOperand(Src2);
+ .addReg(RotatedOldVal)
+ .add(Src2);
else if (IsSubWord)
// Use RISBG to rotate Src2 into position and use it to replace the
// field in RotatedOldVal.
@@ -5465,7 +5495,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
- .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
+ .addReg(OldVal)
+ .addReg(NewVal)
+ .add(Base)
+ .addImm(Disp);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
@@ -5533,8 +5566,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
// %OrigVal = L Disp(%Base)
// # fall through to LoopMMB
MBB = StartMBB;
- BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
- .addOperand(Base).addImm(Disp).addReg(0);
+ BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
MBB->addSuccessor(LoopMBB);
// LoopMBB:
@@ -5581,7 +5613,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
- .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
+ .addReg(OldVal)
+ .addReg(NewVal)
+ .add(Base)
+ .addImm(Disp);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
@@ -5642,7 +5677,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
// # fall through to LoopMMB
MBB = StartMBB;
BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
- .addOperand(Base).addImm(Disp).addReg(0);
+ .add(Base)
+ .addImm(Disp)
+ .addReg(0);
MBB->addSuccessor(LoopMBB);
// LoopMBB:
@@ -5696,7 +5733,10 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
.addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
- .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
+ .addReg(OldVal)
+ .addReg(StoreVal)
+ .add(Base)
+ .addImm(Disp);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
@@ -5869,7 +5909,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
if (!isUInt<12>(DestDisp)) {
unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
- .addOperand(DestBase)
+ .add(DestBase)
.addImm(DestDisp)
.addReg(0);
DestBase = MachineOperand::CreateReg(Reg, false);
@@ -5878,15 +5918,18 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
if (!isUInt<12>(SrcDisp)) {
unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
- .addOperand(SrcBase)
+ .add(SrcBase)
.addImm(SrcDisp)
.addReg(0);
SrcBase = MachineOperand::CreateReg(Reg, false);
SrcDisp = 0;
}
BuildMI(*MBB, MI, DL, TII->get(Opcode))
- .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength)
- .addOperand(SrcBase).addImm(SrcDisp);
+ .add(DestBase)
+ .addImm(DestDisp)
+ .addImm(ThisLength)
+ .add(SrcBase)
+ .addImm(SrcDisp);
DestDisp += ThisLength;
SrcDisp += ThisLength;
Length -= ThisLength;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 7a21a474c119..7d92a7355877 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -537,6 +537,7 @@ private:
unsigned UnpackHigh) const;
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
+ bool canTreatAsByteVector(EVT VT) const;
SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
unsigned Index, DAGCombinerInfo &DCI,
bool Force) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 3565d5f2c49c..c8ff9558cc88 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -11,12 +11,33 @@
//
//===----------------------------------------------------------------------===//
-#include "SystemZInstrInfo.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZ.h"
#include "SystemZInstrBuilder.h"
-#include "SystemZTargetMachine.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
@@ -58,12 +79,25 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
MachineInstr *EarlierMI = MF.CloneMachineInstr(&*MI);
MBB->insert(MI, EarlierMI);
- // Set up the two 64-bit registers.
+ // Set up the two 64-bit registers and remember super reg and its flags.
MachineOperand &HighRegOp = EarlierMI->getOperand(0);
MachineOperand &LowRegOp = MI->getOperand(0);
+ unsigned Reg128 = LowRegOp.getReg();
+ unsigned Reg128Killed = getKillRegState(LowRegOp.isKill());
+ unsigned Reg128Undef = getUndefRegState(LowRegOp.isUndef());
HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64));
LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64));
+ if (MI->mayStore()) {
+ // Add implicit uses of the super register in case one of the subregs is
+ // undefined. We could track liveness and skip storing an undefined
+ // subreg, but this is hopefully rare (discovered with llvm-stress).
+ // If Reg128 was killed, set kill flag on MI.
+ unsigned Reg128UndefImpl = (Reg128Undef | RegState::Implicit);
+ MachineInstrBuilder(MF, EarlierMI).addReg(Reg128, Reg128UndefImpl);
+ MachineInstrBuilder(MF, MI).addReg(Reg128, (Reg128UndefImpl | Reg128Killed));
+ }
+
// The address in the first (high) instruction is already correct.
// Adjust the offset in the second (low) instruction.
MachineOperand &HighOffsetOp = EarlierMI->getOperand(2);
@@ -131,7 +165,8 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
MI.setDesc(get(LowOpcodeK));
else {
emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg,
- SystemZ::LR, 32, MI.getOperand(1).isKill());
+ SystemZ::LR, 32, MI.getOperand(1).isKill(),
+ MI.getOperand(1).isUndef());
MI.setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
MI.getOperand(1).setReg(DestReg);
MI.tieOperands(0, 1);
@@ -185,9 +220,15 @@ void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
// are low registers, otherwise use RISB[LH]G.
void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const {
- emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(),
- MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode,
- Size, MI.getOperand(1).isKill());
+ MachineInstrBuilder MIB =
+ emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(),
+ MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode,
+ Size, MI.getOperand(1).isKill(), MI.getOperand(1).isUndef());
+
+ // Keep the remaining operands as-is.
+ for (unsigned I = 2; I < MI.getNumOperands(); ++I)
+ MIB.add(MI.getOperand(I));
+
MI.eraseFromParent();
}
@@ -227,11 +268,13 @@ void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const {
// are low registers, otherwise use RISB[LH]G. Size is the number of bits
// taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR).
// KillSrc is true if this move is the last use of SrcReg.
-void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, unsigned LowLowOpcode,
- unsigned Size, bool KillSrc) const {
+MachineInstrBuilder
+SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, unsigned LowLowOpcode,
+ unsigned Size, bool KillSrc,
+ bool UndefSrc) const {
unsigned Opcode;
bool DestIsHigh = isHighReg(DestReg);
bool SrcIsHigh = isHighReg(SrcReg);
@@ -242,18 +285,16 @@ void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
else if (!DestIsHigh && SrcIsHigh)
Opcode = SystemZ::RISBLH;
else {
- BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
+ return BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc) | getUndefRegState(UndefSrc));
}
unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0);
- BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
+ return BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
.addReg(DestReg, RegState::Undef)
- .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SrcReg, getKillRegState(KillSrc) | getUndefRegState(UndefSrc))
.addImm(32 - Size).addImm(128 + 31).addImm(Rotate);
}
-
MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool NewMI,
unsigned OpIdx1,
@@ -282,7 +323,6 @@ MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
}
}
-
// If MI is a simple load or store for a frame object, return the register
// it loads or stores and set FrameIndex to the index of the frame object.
// Return 0 otherwise.
@@ -586,7 +626,6 @@ bool SystemZInstrInfo::optimizeCompareInstr(
removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
}
-
bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Pred,
unsigned TrueReg, unsigned FalseReg,
@@ -640,6 +679,12 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
else {
Opc = SystemZ::LOCR;
MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass);
+ unsigned TReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ unsigned FReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ BuildMI(MBB, I, DL, get(TargetOpcode::COPY), TReg).addReg(TrueReg);
+ BuildMI(MBB, I, DL, get(TargetOpcode::COPY), FReg).addReg(FalseReg);
+ TrueReg = TReg;
+ FalseReg = FReg;
}
} else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC))
Opc = SystemZ::LOCGR;
@@ -706,7 +751,7 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
return true;
}
-bool SystemZInstrInfo::isPredicable(MachineInstr &MI) const {
+bool SystemZInstrInfo::isPredicable(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
if (Opcode == SystemZ::Return ||
Opcode == SystemZ::Trap ||
@@ -780,10 +825,11 @@ bool SystemZInstrInfo::PredicateInstruction(
MI.RemoveOperand(0);
MI.setDesc(get(SystemZ::CallBRCL));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addImm(CCValid).addImm(CCMask)
- .addOperand(FirstOp)
- .addRegMask(RegMask)
- .addReg(SystemZ::CC, RegState::Implicit);
+ .addImm(CCValid)
+ .addImm(CCMask)
+ .add(FirstOp)
+ .addRegMask(RegMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
return true;
}
if (Opcode == SystemZ::CallBR) {
@@ -813,7 +859,8 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) {
- emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc);
+ emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc,
+ false);
return;
}
@@ -888,15 +935,19 @@ static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) {
}
namespace {
+
struct LogicOp {
- LogicOp() : RegSize(0), ImmLSB(0), ImmSize(0) {}
+ LogicOp() = default;
LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize)
: RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {}
explicit operator bool() const { return RegSize; }
- unsigned RegSize, ImmLSB, ImmSize;
+ unsigned RegSize = 0;
+ unsigned ImmLSB = 0;
+ unsigned ImmSize = 0;
};
+
} // end anonymous namespace
static LogicOp interpretAndImmediate(unsigned Opcode) {
@@ -976,12 +1027,12 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
MachineInstrBuilder MIB(
*MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(),
/*NoImplicit=*/true));
- MIB.addOperand(Dest);
+ MIB.add(Dest);
// Keep the kill state, but drop the tied flag.
MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
// Keep the remaining operands as-is.
for (unsigned I = 2; I < NumOps; ++I)
- MIB.addOperand(MI.getOperand(I));
+ MIB.add(MI.getOperand(I));
MBB->insert(MI, MIB);
return finishConvertToThreeAddress(&MI, MIB, LV);
}
@@ -1009,7 +1060,7 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
MachineOperand &Src = MI.getOperand(1);
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpcode))
- .addOperand(Dest)
+ .add(Dest)
.addReg(0)
.addReg(Src.getReg(), getKillRegState(Src.isKill()),
Src.getSubReg())
@@ -1040,7 +1091,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MCRegUnitIterator CCUnit(SystemZ::CC, TRI);
LiveRange &CCLiveRange = LIS->getRegUnit(*CCUnit);
++CCUnit;
- assert (!CCUnit.isValid() && "CC only has one reg unit.");
+ assert(!CCUnit.isValid() && "CC only has one reg unit.");
SlotIndex MISlot =
LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
if (!CCLiveRange.liveAt(MISlot)) {
@@ -1091,7 +1142,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD;
return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
get(StoreOpcode))
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addFrameIndex(FrameIndex)
.addImm(0)
.addReg(0);
@@ -1100,12 +1151,12 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
// destination register instead.
if (OpNum == 1) {
unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD;
- unsigned Dest = MI.getOperand(0).getReg();
return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
- get(LoadOpcode), Dest)
- .addFrameIndex(FrameIndex)
- .addImm(0)
- .addReg(0);
+ get(LoadOpcode))
+ .add(MI.getOperand(0))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addReg(0);
}
}
@@ -1132,7 +1183,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
.addFrameIndex(FrameIndex)
.addImm(0)
.addImm(Size)
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addImm(MI.getOperand(2).getImm())
.addMemOperand(MMO);
}
@@ -1140,7 +1191,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXStore)) {
return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
get(SystemZ::MVC))
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addImm(MI.getOperand(2).getImm())
.addImm(Size)
.addFrameIndex(FrameIndex)
@@ -1164,7 +1215,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
MI.getDebugLoc(), get(MemOpcode));
for (unsigned I = 0; I < OpNum; ++I)
- MIB.addOperand(MI.getOperand(I));
+ MIB.add(MI.getOperand(I));
MIB.addFrameIndex(FrameIndex).addImm(Offset);
if (MemDesc.TSFlags & SystemZII::HasIndex)
MIB.addReg(0);
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 794b193a501e..b8be1f5f3921 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -16,16 +16,22 @@
#include "SystemZ.h"
#include "SystemZRegisterInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include <cstdint>
#define GET_INSTRINFO_HEADER
#include "SystemZGenInstrInfo.inc"
namespace llvm {
-class SystemZTargetMachine;
+class SystemZSubtarget;
namespace SystemZII {
+
enum {
// See comments in SystemZInstrFormats.td.
SimpleBDXLoad = (1 << 0),
@@ -43,12 +49,15 @@ enum {
CCMaskLast = (1 << 19),
IsLogical = (1 << 20)
};
+
static inline unsigned getAccessSize(unsigned int Flags) {
return (Flags & AccessSizeMask) >> AccessSizeShift;
}
+
static inline unsigned getCCValues(unsigned int Flags) {
return (Flags & CCValuesMask) >> CCValuesShift;
}
+
static inline unsigned getCompareZeroCCMask(unsigned int Flags) {
return (Flags & CompareZeroCCMaskMask) >> CompareZeroCCMaskShift;
}
@@ -64,6 +73,7 @@ enum {
// @INDNTPOFF
MO_INDNTPOFF = (2 << 0)
};
+
// Classifies a branch.
enum BranchType {
// An instruction that branches on the current value of CC.
@@ -93,6 +103,7 @@ enum BranchType {
// the result is nonzero.
BranchCTG
};
+
// Information about a branch instruction.
struct Branch {
// The type of the branch.
@@ -111,6 +122,7 @@ struct Branch {
const MachineOperand *target)
: Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {}
};
+
// Kinds of fused compares in compare-and-* instructions. Together with type
// of the converted compare, this identifies the compare-and-*
// instruction.
@@ -127,9 +139,9 @@ enum FusedCompareType {
// Trap
CompareAndTrap
};
+
} // end namespace SystemZII
-class SystemZSubtarget;
class SystemZInstrInfo : public SystemZGenInstrInfo {
const SystemZRegisterInfo RI;
SystemZSubtarget &STI;
@@ -149,9 +161,13 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const;
void expandLoadStackGuard(MachineInstr *MI) const;
- void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
- unsigned LowLowOpcode, unsigned Size, bool KillSrc) const;
+
+ MachineInstrBuilder
+ emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ unsigned LowLowOpcode, unsigned Size, bool KillSrc,
+ bool UndefSrc) const;
+
virtual void anchor();
protected:
@@ -203,7 +219,7 @@ public:
unsigned FalseReg) const override;
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
MachineRegisterInfo *MRI) const override;
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
BranchProbability Probability) const override;
@@ -304,6 +320,7 @@ public:
areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
AliasAnalysis *AA = nullptr) const override;
};
+
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H
diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td
index 738ea7a33729..0158fe6aec08 100644
--- a/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@@ -56,17 +56,28 @@ def : VectorExtractSubreg<v4i32, VLGVF>;
//===----------------------------------------------------------------------===//
let Predicates = [FeatureVector] in {
- // Generate byte mask.
- def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
- def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
- def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
-
- // Generate mask.
- def VGM : BinaryVRIbGeneric<"vgm", 0xE746>;
- def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>;
- def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>;
- def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>;
- def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>;
+ let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
+ isReMaterializable = 1 in {
+
+ // Generate byte mask.
+ def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
+ def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
+ def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
+
+ // Generate mask.
+ def VGM : BinaryVRIbGeneric<"vgm", 0xE746>;
+ def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>;
+ def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>;
+ def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>;
+ def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>;
+
+ // Replicate immediate.
+ def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>;
+ def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
+ def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
+ def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
+ def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
+ }
// Load element immediate.
//
@@ -86,13 +97,6 @@ let Predicates = [FeatureVector] in {
def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert,
v128g, v128g, imm64sx16, imm32zx1>;
}
-
- // Replicate immediate.
- def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>;
- def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
- def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
- def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
- def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 14ff6afbd4ae..791f0334e0f1 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -53,15 +53,21 @@
//
//===----------------------------------------------------------------------===//
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
#include "SystemZTargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -70,72 +76,72 @@ using namespace llvm;
STATISTIC(LongBranches, "Number of long branches.");
namespace {
+
// Represents positional information about a basic block.
struct MBBInfo {
// The address that we currently assume the block has.
- uint64_t Address;
+ uint64_t Address = 0;
// The size of the block in bytes, excluding terminators.
// This value never changes.
- uint64_t Size;
+ uint64_t Size = 0;
// The minimum alignment of the block, as a log2 value.
// This value never changes.
- unsigned Alignment;
+ unsigned Alignment = 0;
// The number of terminators in this block. This value never changes.
- unsigned NumTerminators;
+ unsigned NumTerminators = 0;
- MBBInfo()
- : Address(0), Size(0), Alignment(0), NumTerminators(0) {}
+ MBBInfo() = default;
};
// Represents the state of a block terminator.
struct TerminatorInfo {
// If this terminator is a relaxable branch, this points to the branch
// instruction, otherwise it is null.
- MachineInstr *Branch;
+ MachineInstr *Branch = nullptr;
// The address that we currently assume the terminator has.
- uint64_t Address;
+ uint64_t Address = 0;
// The current size of the terminator in bytes.
- uint64_t Size;
+ uint64_t Size = 0;
// If Branch is nonnull, this is the number of the target block,
// otherwise it is unused.
- unsigned TargetBlock;
+ unsigned TargetBlock = 0;
// If Branch is nonnull, this is the length of the longest relaxed form,
// otherwise it is zero.
- unsigned ExtraRelaxSize;
+ unsigned ExtraRelaxSize = 0;
- TerminatorInfo() : Branch(nullptr), Size(0), TargetBlock(0),
- ExtraRelaxSize(0) {}
+ TerminatorInfo() = default;
};
// Used to keep track of the current position while iterating over the blocks.
struct BlockPosition {
// The address that we assume this position has.
- uint64_t Address;
+ uint64_t Address = 0;
// The number of low bits in Address that are known to be the same
// as the runtime address.
unsigned KnownBits;
- BlockPosition(unsigned InitialAlignment)
- : Address(0), KnownBits(InitialAlignment) {}
+ BlockPosition(unsigned InitialAlignment) : KnownBits(InitialAlignment) {}
};
class SystemZLongBranch : public MachineFunctionPass {
public:
static char ID;
+
SystemZLongBranch(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr) {}
+ : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "SystemZ Long Branch"; }
bool runOnMachineFunction(MachineFunction &F) override;
+
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@@ -155,7 +161,7 @@ private:
void relaxBranch(TerminatorInfo &Terminator);
void relaxBranches();
- const SystemZInstrInfo *TII;
+ const SystemZInstrInfo *TII = nullptr;
MachineFunction *MF;
SmallVector<MBBInfo, 16> MBBs;
SmallVector<TerminatorInfo, 16> Terminators;
@@ -165,11 +171,8 @@ char SystemZLongBranch::ID = 0;
const uint64_t MaxBackwardRange = 0x10000;
const uint64_t MaxForwardRange = 0xfffe;
-} // end anonymous namespace
-FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
- return new SystemZLongBranch(TM);
-}
+} // end anonymous namespace
// Position describes the state immediately before Block. Update Block
// accordingly and move Position to the end of the block's non-terminator
@@ -354,13 +357,13 @@ void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI,
MachineBasicBlock *MBB = MI->getParent();
DebugLoc DL = MI->getDebugLoc();
BuildMI(*MBB, MI, DL, TII->get(AddOpcode))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addImm(-1);
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(-1);
MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
- .addImm(SystemZ::CCMASK_ICMP)
- .addImm(SystemZ::CCMASK_CMP_NE)
- .addOperand(MI->getOperand(2));
+ .addImm(SystemZ::CCMASK_ICMP)
+ .addImm(SystemZ::CCMASK_CMP_NE)
+ .add(MI->getOperand(2));
// The implicit use of CC is a killing use.
BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
MI->eraseFromParent();
@@ -373,12 +376,12 @@ void SystemZLongBranch::splitCompareBranch(MachineInstr *MI,
MachineBasicBlock *MBB = MI->getParent();
DebugLoc DL = MI->getDebugLoc();
BuildMI(*MBB, MI, DL, TII->get(CompareOpcode))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1));
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1));
MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
- .addImm(SystemZ::CCMASK_ICMP)
- .addOperand(MI->getOperand(2))
- .addOperand(MI->getOperand(3));
+ .addImm(SystemZ::CCMASK_ICMP)
+ .add(MI->getOperand(2))
+ .add(MI->getOperand(3));
// The implicit use of CC is a killing use.
BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
MI->eraseFromParent();
@@ -463,3 +466,7 @@ bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
relaxBranches();
return true;
}
+
+FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
+ return new SystemZLongBranch(TM);
+}
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.h b/lib/Target/SystemZ/SystemZMachineScheduler.h
index b919758b70e7..12357e0348a9 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.h
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -1,4 +1,4 @@
-//==-- SystemZMachineScheduler.h - SystemZ Scheduler Interface -*- C++ -*---==//
+//==- SystemZMachineScheduler.h - SystemZ Scheduler Interface ----*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -14,10 +14,10 @@
// usage of processor resources.
//===----------------------------------------------------------------------===//
-#include "SystemZInstrInfo.h"
#include "SystemZHazardRecognizer.h"
#include "llvm/CodeGen/MachineScheduler.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include <set>
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
@@ -28,29 +28,29 @@ namespace llvm {
/// A MachineSchedStrategy implementation for SystemZ post RA scheduling.
class SystemZPostRASchedStrategy : public MachineSchedStrategy {
- ScheduleDAGMI *DAG;
+ ScheduleDAGMI *DAG;
/// A candidate during instruction evaluation.
struct Candidate {
- SUnit *SU;
+ SUnit *SU = nullptr;
/// The decoding cost.
- int GroupingCost;
+ int GroupingCost = 0;
/// The processor resources cost.
- int ResourcesCost;
+ int ResourcesCost = 0;
- Candidate() : SU(nullptr), GroupingCost(0), ResourcesCost(0) {}
+ Candidate() = default;
Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec);
// Compare two candidates.
bool operator<(const Candidate &other);
// Check if this node is free of cost ("as good as any").
- bool inline noCost() {
+ bool noCost() const {
return (GroupingCost <= 0 && !ResourcesCost);
}
- };
+ };
// A sorter for the Available set that makes sure that SUs are considered
// in the best order.
@@ -83,7 +83,7 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy {
// region.
SystemZHazardRecognizer HazardRec;
- public:
+public:
SystemZPostRASchedStrategy(const MachineSchedContext *C);
/// PostRA scheduling does not track pressure.
@@ -107,6 +107,6 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy {
void releaseBottomNode(SUnit *SU) override {};
};
-} // namespace llvm
+} // end namespace llvm
-#endif /* LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H */
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td
index e97d61d8355d..7aee6f52e9a7 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -855,8 +855,8 @@ def : InstRW<[VecXsPm], (instregex "VZERO$")>;
def : InstRW<[VecXsPm], (instregex "VONE$")>;
def : InstRW<[VecXsPm], (instregex "VGBM$")>;
def : InstRW<[VecXsPm], (instregex "VGM(B|F|G|H)?$")>;
-def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>;
def : InstRW<[VecXsPm], (instregex "VREPI(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>;
//===----------------------------------------------------------------------===//
// Vector: Loads
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index 83882fc0310a..263aff8b7bfb 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -167,10 +167,10 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
MI.RemoveOperand(0);
MI.setDesc(TII->get(Opcode));
MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
- .addOperand(Dest)
- .addOperand(Mode)
- .addOperand(Src)
- .addOperand(Suppress);
+ .add(Dest)
+ .add(Mode)
+ .add(Src)
+ .add(Suppress);
return true;
}
return false;
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 33fdb8f90825..ede5005fa491 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -7,14 +7,25 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZ.h"
+#include "SystemZMachineScheduler.h"
#include "SystemZTargetMachine.h"
#include "SystemZTargetTransformInfo.h"
-#include "SystemZMachineScheduler.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include <string>
using namespace llvm;
@@ -48,7 +59,7 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
StringRef FS) {
bool VectorABI = UsesVectorABI(CPU, FS);
- std::string Ret = "";
+ std::string Ret;
// Big endian.
Ret += "E";
@@ -96,14 +107,15 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
getEffectiveRelocModel(RM), CM, OL),
- TLOF(make_unique<TargetLoweringObjectFileELF>()),
+ TLOF(llvm::make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
-SystemZTargetMachine::~SystemZTargetMachine() {}
+SystemZTargetMachine::~SystemZTargetMachine() = default;
namespace {
+
/// SystemZ Code Generator Pass Configuration Options.
class SystemZPassConfig : public TargetPassConfig {
public:
@@ -116,7 +128,8 @@ public:
ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext *C) const override {
- return new ScheduleDAGMI(C, make_unique<SystemZPostRASchedStrategy>(C),
+ return new ScheduleDAGMI(C,
+ llvm::make_unique<SystemZPostRASchedStrategy>(C),
/*RemoveKillFlags=*/true);
}
@@ -126,6 +139,7 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
+
} // end anonymous namespace
void SystemZPassConfig::addIRPasses() {
@@ -157,7 +171,6 @@ void SystemZPassConfig::addPreSched2() {
}
void SystemZPassConfig::addPreEmitPass() {
-
// Do instruction shortening before compare elimination because some
// vector instructions will be shortened into opcodes that compare
// elimination recognizes.
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 69cf9bc6e525..a10ca64fa632 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -1,4 +1,4 @@
-//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
+//=- SystemZTargetMachine.h - Define TargetMachine for SystemZ ----*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -16,15 +16,18 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
#include "SystemZSubtarget.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
+#include <memory>
namespace llvm {
-class TargetFrameLowering;
-
class SystemZTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- SystemZSubtarget Subtarget;
+ SystemZSubtarget Subtarget;
public:
SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -34,20 +37,22 @@ public:
~SystemZTargetMachine() override;
const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
const SystemZSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
+
// Override LLVMTargetMachine
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
TargetIRAnalysis getTargetIRAnalysis() override;
+
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
bool targetSchedulesPostRAScheduling() const override { return true; };
-
};
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index b10c0e09a0d4..e74c9a80515d 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -259,11 +259,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L,
}
}
if (isa<StoreInst>(&I)) {
- NumStores++;
Type *MemAccessTy = I.getOperand(0)->getType();
- if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) &&
- (getDataLayout().getTypeSizeInBits(MemAccessTy) == 128))
- NumStores++; // 128 bit fp/int stores get split.
+ NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0);
}
}
@@ -313,3 +310,547 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) {
return 0;
}
+int SystemZTTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args) {
+
+ // TODO: return a good value for BB-VECTORIZER that includes the
+ // immediate loads, which we do not want to count for the loop
+ // vectorizer, since they are hopefully hoisted out of the loop. This
+ // would require a new parameter 'InLoop', but not sure if constant
+ // args are common enough to motivate this.
+
+ unsigned ScalarBits = Ty->getScalarSizeInBits();
+
+ if (Ty->isVectorTy()) {
+ assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type.");
+ unsigned VF = Ty->getVectorNumElements();
+ unsigned NumVectors = getNumberOfParts(Ty);
+
+ // These vector operations are custom handled, but are still supported
+ // with one instruction per vector, regardless of element size.
+ if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
+ Opcode == Instruction::AShr) {
+ return NumVectors;
+ }
+
+ // These FP operations are supported with a single vector instruction for
+ // double (base implementation assumes float generally costs 2). For
+ // FP128, the scalar cost is 1, and there is no overhead since the values
+ // are already in scalar registers.
+ if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
+ switch (ScalarBits) {
+ case 32: {
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values.
+ unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+ unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
+ // FIXME: VF 2 for these FP operations are currently just as
+ // expensive as for VF 4.
+ if (VF == 2)
+ Cost *= 2;
+ return Cost;
+ }
+ case 64:
+ case 128:
+ return NumVectors;
+ default:
+ break;
+ }
+ }
+
+ // There is no native support for FRem.
+ if (Opcode == Instruction::FRem) {
+ unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args);
+ // FIXME: VF 2 for float is currently just as expensive as for VF 4.
+ if (VF == 2 && ScalarBits == 32)
+ Cost *= 2;
+ return Cost;
+ }
+ }
+ else { // Scalar:
+ // These FP operations are supported with a dedicated instruction for
+ // float, double and fp128 (base implementation assumes float generally
+ // costs 2).
+ if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
+ return 1;
+
+ // There is no native support for FRem.
+ if (Opcode == Instruction::FRem)
+ return LIBCALL_COST;
+
+ if (Opcode == Instruction::LShr || Opcode == Instruction::AShr)
+ return (ScalarBits >= 32 ? 1 : 2 /*ext*/);
+
+ // Or requires one instruction, although it has custom handling for i64.
+ if (Opcode == Instruction::Or)
+ return 1;
+
+ if (Opcode == Instruction::Xor && ScalarBits == 1)
+ // 2 * ipm sequences ; xor ; shift ; compare
+ return 7;
+
+ // An extra extension for narrow types is needed.
+ if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
+ // sext of op(s) for narrow types
+ return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1));
+
+ if (Opcode == Instruction::UDiv || Opcode == Instruction::URem)
+ // Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r
+ return (ScalarBits < 32 ? 4 : 2);
+ }
+
+ // Fallback to the default implementation.
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo, Args);
+}
+
+
+int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
+ assert (Tp->isVectorTy());
+ assert (ST->hasVector() && "getShuffleCost() called.");
+ unsigned NumVectors = getNumberOfParts(Tp);
+
+ // TODO: Since fp32 is expanded, the shuffle cost should always be 0.
+
+ // FP128 values are always in scalar registers, so there is no work
+ // involved with a shuffle, except for broadcast. In that case register
+ // moves are done with a single instruction per element.
+ if (Tp->getScalarType()->isFP128Ty())
+ return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0);
+
+ switch (Kind) {
+ case TargetTransformInfo::SK_ExtractSubvector:
+ // ExtractSubvector Index indicates start offset.
+
+ // Extracting a subvector from first index is a noop.
+ return (Index == 0 ? 0 : NumVectors);
+
+ case TargetTransformInfo::SK_Broadcast:
+ // Loop vectorizer calls here to figure out the extra cost of
+ // broadcasting a loaded value to all elements of a vector. Since vlrep
+ // loads and replicates with a single instruction, adjust the returned
+ // value.
+ return NumVectors - 1;
+
+ default:
+
+ // SystemZ supports single instruction permutation / replication.
+ return NumVectors;
+ }
+
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+// Return the log2 difference of the element sizes of the two vector types.
+static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1) {
+ unsigned Bits0 = Ty0->getScalarSizeInBits();
+ unsigned Bits1 = Ty1->getScalarSizeInBits();
+
+ if (Bits1 > Bits0)
+ return (Log2_32(Bits1) - Log2_32(Bits0));
+
+ return (Log2_32(Bits0) - Log2_32(Bits1));
+}
+
+// Return the number of instructions needed to truncate SrcTy to DstTy.
+unsigned SystemZTTIImpl::
+getVectorTruncCost(Type *SrcTy, Type *DstTy) {
+ assert (SrcTy->isVectorTy() && DstTy->isVectorTy());
+ assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() &&
+ "Packing must reduce size of vector type.");
+ assert (SrcTy->getVectorNumElements() == DstTy->getVectorNumElements() &&
+ "Packing should not change number of elements.");
+
+ // TODO: Since fp32 is expanded, the extract cost should always be 0.
+
+ unsigned NumParts = getNumberOfParts(SrcTy);
+ if (NumParts <= 2)
+ // Up to 2 vector registers can be truncated efficiently with pack or
+ // permute. The latter requires an immediate mask to be loaded, which
+ // typically gets hoisted out of a loop. TODO: return a good value for
+ // BB-VECTORIZER that includes the immediate loads, which we do not want
+ // to count for the loop vectorizer.
+ return 1;
+
+ unsigned Cost = 0;
+ unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
+ unsigned VF = SrcTy->getVectorNumElements();
+ for (unsigned P = 0; P < Log2Diff; ++P) {
+ if (NumParts > 1)
+ NumParts /= 2;
+ Cost += NumParts;
+ }
+
+ // Currently, a general mix of permutes and pack instructions is output by
+ // isel, which follow the cost computation above except for this case which
+ // is one instruction less:
+ if (VF == 8 && SrcTy->getScalarSizeInBits() == 64 &&
+ DstTy->getScalarSizeInBits() == 8)
+ Cost--;
+
+ return Cost;
+}
+
+// Return the cost of converting a vector bitmask produced by a compare
+// (SrcTy), to the type of the select or extend instruction (DstTy).
+unsigned SystemZTTIImpl::
+getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) {
+ assert (SrcTy->isVectorTy() && DstTy->isVectorTy() &&
+ "Should only be called with vector types.");
+
+ unsigned PackCost = 0;
+ unsigned SrcScalarBits = SrcTy->getScalarSizeInBits();
+ unsigned DstScalarBits = DstTy->getScalarSizeInBits();
+ unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
+ if (SrcScalarBits > DstScalarBits)
+ // The bitmask will be truncated.
+ PackCost = getVectorTruncCost(SrcTy, DstTy);
+ else if (SrcScalarBits < DstScalarBits) {
+ unsigned DstNumParts = getNumberOfParts(DstTy);
+ // Each vector select needs its part of the bitmask unpacked.
+ PackCost = Log2Diff * DstNumParts;
+ // Extra cost for moving part of mask before unpacking.
+ PackCost += DstNumParts - 1;
+ }
+
+ return PackCost;
+}
+
+// Return the type of the compared operands. This is needed to compute the
+// cost for a Select / ZExt or SExt instruction.
+static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
+ Type *OpTy = nullptr;
+ if (CmpInst *CI = dyn_cast<CmpInst>(I->getOperand(0)))
+ OpTy = CI->getOperand(0)->getType();
+ else if (Instruction *LogicI = dyn_cast<Instruction>(I->getOperand(0)))
+ if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0)))
+ if (isa<CmpInst>(LogicI->getOperand(1)))
+ OpTy = CI0->getOperand(0)->getType();
+
+ if (OpTy != nullptr) {
+ if (VF == 1) {
+ assert (!OpTy->isVectorTy() && "Expected scalar type");
+ return OpTy;
+ }
+ // Return the potentially vectorized type based on 'I' and 'VF'. 'I' may
+ // be either scalar or already vectorized with a same or lesser VF.
+ Type *ElTy = OpTy->getScalarType();
+ return VectorType::get(ElTy, VF);
+ }
+
+ return nullptr;
+}
+
+int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
+ unsigned DstScalarBits = Dst->getScalarSizeInBits();
+ unsigned SrcScalarBits = Src->getScalarSizeInBits();
+
+ if (Src->isVectorTy()) {
+ assert (ST->hasVector() && "getCastInstrCost() called with vector type.");
+ assert (Dst->isVectorTy());
+ unsigned VF = Src->getVectorNumElements();
+ unsigned NumDstVectors = getNumberOfParts(Dst);
+ unsigned NumSrcVectors = getNumberOfParts(Src);
+
+ if (Opcode == Instruction::Trunc) {
+ if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits())
+ return 0; // Check for NOOP conversions.
+ return getVectorTruncCost(Src, Dst);
+ }
+
+ if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
+ if (SrcScalarBits >= 8) {
+ // ZExt/SExt will be handled with one unpack per doubling of width.
+ unsigned NumUnpacks = getElSizeLog2Diff(Src, Dst);
+
+ // For types that spans multiple vector registers, some additional
+ // instructions are used to setup the unpacking.
+ unsigned NumSrcVectorOps =
+ (NumUnpacks > 1 ? (NumDstVectors - NumSrcVectors)
+ : (NumDstVectors / 2));
+
+ return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
+ }
+ else if (SrcScalarBits == 1) {
+ // This should be extension of a compare i1 result.
+ // If we know what the widths of the compared operands, get the
+ // cost of converting it to Dst. Otherwise assume same widths.
+ unsigned Cost = 0;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
+ if (CmpOpTy != nullptr)
+ Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
+ if (Opcode == Instruction::ZExt)
+ // One 'vn' per dst vector with an immediate mask.
+ Cost += NumDstVectors;
+ return Cost;
+ }
+ }
+
+ if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
+ Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) {
+ // TODO: Fix base implementation which could simplify things a bit here
+ // (seems to miss on differentiating on scalar/vector types).
+
+ // Only 64 bit vector conversions are natively supported.
+ if (SrcScalarBits == 64 && DstScalarBits == 64)
+ return NumDstVectors;
+
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values. Base implementation does not
+ // realize float->int gets scalarized.
+ unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(),
+ Src->getScalarType());
+ unsigned TotCost = VF * ScalarCost;
+ bool NeedsInserts = true, NeedsExtracts = true;
+ // FP128 registers do not get inserted or extracted.
+ if (DstScalarBits == 128 &&
+ (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP))
+ NeedsInserts = false;
+ if (SrcScalarBits == 128 &&
+ (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
+ NeedsExtracts = false;
+
+ TotCost += getScalarizationOverhead(Dst, NeedsInserts, NeedsExtracts);
+
+ // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
+ if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
+ TotCost *= 2;
+
+ return TotCost;
+ }
+
+ if (Opcode == Instruction::FPTrunc) {
+ if (SrcScalarBits == 128) // fp128 -> double/float + inserts of elements.
+ return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false);
+ else // double -> float
+ return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/);
+ }
+
+ if (Opcode == Instruction::FPExt) {
+ if (SrcScalarBits == 32 && DstScalarBits == 64) {
+ // float -> double is very rare and currently unoptimized. Instead of
+ // using vldeb, which can do two at a time, all conversions are
+ // scalarized.
+ return VF * 2;
+ }
+ // -> fp128. VF * lxdb/lxeb + extraction of elements.
+ return VF + getScalarizationOverhead(Src, false, true);
+ }
+ }
+ else { // Scalar
+ assert (!Dst->isVectorTy());
+
+ if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)
+ return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/);
+
+ if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+ Src->isIntegerTy(1)) {
+ // This should be extension of a compare i1 result, which is done with
+ // ipm and a varying sequence of instructions.
+ unsigned Cost = 0;
+ if (Opcode == Instruction::SExt)
+ Cost = (DstScalarBits < 64 ? 3 : 4);
+ if (Opcode == Instruction::ZExt)
+ Cost = 3;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
+ if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
+ // If operands of an fp-type was compared, this costs +1.
+ Cost++;
+
+ return Cost;
+ }
+ }
+
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
+}
+
+int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
+ if (ValTy->isVectorTy()) {
+ assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type.");
+ assert (CondTy == nullptr || CondTy->isVectorTy());
+ unsigned VF = ValTy->getVectorNumElements();
+
+ // Called with a compare instruction.
+ if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
+ unsigned PredicateExtraCost = 0;
+ if (I != nullptr) {
+ // Some predicates cost one or two extra instructions.
+ switch (dyn_cast<CmpInst>(I)->getPredicate()) {
+ case CmpInst::Predicate::ICMP_NE:
+ case CmpInst::Predicate::ICMP_UGE:
+ case CmpInst::Predicate::ICMP_ULE:
+ case CmpInst::Predicate::ICMP_SGE:
+ case CmpInst::Predicate::ICMP_SLE:
+ PredicateExtraCost = 1;
+ break;
+ case CmpInst::Predicate::FCMP_ONE:
+ case CmpInst::Predicate::FCMP_ORD:
+ case CmpInst::Predicate::FCMP_UEQ:
+ case CmpInst::Predicate::FCMP_UNO:
+ PredicateExtraCost = 2;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Float is handled with 2*vmr[lh]f + 2*vldeb + vfchdb for each pair of
+ // floats. FIXME: <2 x float> generates same code as <4 x float>.
+ unsigned CmpCostPerVector = (ValTy->getScalarType()->isFloatTy() ? 10 : 1);
+ unsigned NumVecs_cmp = getNumberOfParts(ValTy);
+
+ unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost));
+ return Cost;
+ }
+ else { // Called with a select instruction.
+ assert (Opcode == Instruction::Select);
+
+ // We can figure out the extra cost of packing / unpacking if the
+ // instruction was passed and the compare instruction is found.
+ unsigned PackCost = 0;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
+ if (CmpOpTy != nullptr)
+ PackCost =
+ getVectorBitmaskConversionCost(CmpOpTy, ValTy);
+
+ return getNumberOfParts(ValTy) /*vsel*/ + PackCost;
+ }
+ }
+ else { // Scalar
+ switch (Opcode) {
+ case Instruction::ICmp: {
+ unsigned Cost = 1;
+ if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
+ Cost += 2; // extend both operands
+ return Cost;
+ }
+ case Instruction::Select:
+ if (ValTy->isFloatingPointTy())
+ return 4; // No load on condition for FP, so this costs a conditional jump.
+ return 1; // Load On Condition.
+ }
+ }
+
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr);
+}
+
+int SystemZTTIImpl::
+getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
+ // vlvgp will insert two grs into a vector register, so only count half the
+ // number of instructions.
+ if (Opcode == Instruction::InsertElement &&
+ Val->getScalarType()->isIntegerTy(64))
+ return ((Index % 2 == 0) ? 1 : 0);
+
+ if (Opcode == Instruction::ExtractElement) {
+ int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1);
+
+ // Give a slight penalty for moving out of vector pipeline to FXU unit.
+ if (Index == 0 && Val->getScalarType()->isIntegerTy())
+ Cost += 1;
+
+ return Cost;
+ }
+
+ return BaseT::getVectorInstrCost(Opcode, Val, Index);
+}
+
+int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment, unsigned AddressSpace,
+ const Instruction *I) {
+ assert(!Src->isVoidTy() && "Invalid type");
+
+ if (!Src->isVectorTy() && Opcode == Instruction::Load &&
+ I != nullptr && I->hasOneUse()) {
+ const Instruction *UserI = cast<Instruction>(*I->user_begin());
+ unsigned Bits = Src->getScalarSizeInBits();
+ bool FoldsLoad = false;
+ switch (UserI->getOpcode()) {
+ case Instruction::ICmp:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // This also makes sense for float operations, but disabled for now due
+ // to regressions.
+ // case Instruction::FCmp:
+ // case Instruction::FAdd:
+ // case Instruction::FSub:
+ // case Instruction::FMul:
+ // case Instruction::FDiv:
+ FoldsLoad = (Bits == 32 || Bits == 64);
+ break;
+ }
+
+ if (FoldsLoad) {
+ assert (UserI->getNumOperands() == 2 &&
+ "Expected to only handle binops.");
+
+ // UserI can't fold two loads, so in that case return 0 cost only
+ // half of the time.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (UserI->getOperand(i) == I)
+ continue;
+ if (LoadInst *LI = dyn_cast<LoadInst>(UserI->getOperand(i))) {
+ if (LI->hasOneUse())
+ return i == 0;
+ }
+ }
+
+ return 0;
+ }
+ }
+
+ unsigned NumOps = getNumberOfParts(Src);
+
+ if (Src->getScalarSizeInBits() == 128)
+ // 128 bit scalars are held in a pair of two 64 bit registers.
+ NumOps *= 2;
+
+ return NumOps;
+}
+
+int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ assert(isa<VectorType>(VecTy) &&
+ "Expect a vector type for interleaved memory op");
+
+ unsigned WideBits = (VecTy->isPtrOrPtrVectorTy() ?
+ (64U * VecTy->getVectorNumElements()) : VecTy->getPrimitiveSizeInBits());
+ assert (WideBits > 0 && "Could not compute size of vector");
+ int NumWideParts =
+ ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
+
+ // How many source vectors are handled to produce a vectorized operand?
+ int NumElsPerVector = (VecTy->getVectorNumElements() / NumWideParts);
+ int NumSrcParts =
+ ((NumWideParts > NumElsPerVector) ? NumElsPerVector : NumWideParts);
+
+ // A Load group may have gaps.
+ unsigned NumOperands =
+ ((Opcode == Instruction::Load) ? Indices.size() : Factor);
+
+ // Each needed permute takes two vectors as input.
+ if (NumSrcParts > 1)
+ NumSrcParts--;
+ int NumPermutes = NumSrcParts * NumOperands;
+
+ // Cost of load/store operations and the permutations needed.
+ return NumWideParts + NumPermutes;
+}
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index f7d2d827f11b..3766ed45b8c4 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -27,6 +27,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
const SystemZSubtarget *getST() const { return ST; }
const SystemZTargetLowering *getTLI() const { return TLI; }
+ unsigned const LIBCALL_COST = 30;
+
public:
explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
@@ -53,6 +55,32 @@ public:
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
+ bool supportsEfficientVectorElementLoadStore() { return true; }
+ bool enableInterleavedAccessVectorization() { return true; }
+
+ int getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
+ unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace, const Instruction *I = nullptr);
+
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 375f8511f7ad..50272fda56de 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -264,10 +264,7 @@ bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
// in discardable section
// FIXME: this isn't the right predicate, should be based on the MCSection
// for the function.
- if (F.isWeakForLinker())
- return true;
-
- return false;
+ return F.isWeakForLinker();
}
/// Given a mergable constant with the specified size and relocation
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 8a6d28490e8c..e8fe0a2b218e 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -74,10 +74,10 @@ void TargetMachine::resetTargetOptions(const Function &F) const {
Options.X = DefaultOptions.X; \
} while (0)
- RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad");
RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
+ RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math");
RESET_OPTION(NoTrappingFPMath, "no-trapping-math");
StringRef Denormal =
@@ -156,8 +156,11 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
bool IsTLS = GV && GV->isThreadLocal();
bool IsAccessViaCopyRelocs =
Options.MCOptions.MCPIECopyRelocations && GV && isa<GlobalVariable>(GV);
- // Check if we can use copy relocations.
- if (!IsTLS && (RM == Reloc::Static || IsAccessViaCopyRelocs))
+ Triple::ArchType Arch = TT.getArch();
+ bool IsPPC =
+ Arch == Triple::ppc || Arch == Triple::ppc64 || Arch == Triple::ppc64le;
+ // Check if we can use copy relocations. PowerPC has no copy relocations.
+ if (!IsTLS && !IsPPC && (RM == Reloc::Static || IsAccessViaCopyRelocs))
return true;
}
@@ -198,7 +201,7 @@ CodeGenOpt::Level TargetMachine::getOptLevel() const { return OptLevel; }
void TargetMachine::setOptLevel(CodeGenOpt::Level Level) { OptLevel = Level; }
TargetIRAnalysis TargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
+ return TargetIRAnalysis([](const Function &F) {
return TargetTransformInfo(F.getParent()->getDataLayout());
});
}
diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt
index d9c53ecc8d08..78b2cdb61b76 100644
--- a/lib/Target/WebAssembly/CMakeLists.txt
+++ b/lib/Target/WebAssembly/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_target(WebAssemblyCodeGen
WebAssemblyAsmPrinter.cpp
WebAssemblyCallIndirectFixup.cpp
WebAssemblyCFGStackify.cpp
+ WebAssemblyCFGSort.cpp
WebAssemblyExplicitLocals.cpp
WebAssemblyFastISel.cpp
WebAssemblyFixIrreducibleControlFlow.cpp
@@ -35,6 +36,7 @@ add_llvm_target(WebAssemblyCodeGen
WebAssemblyRegNumbering.cpp
WebAssemblyRegStackify.cpp
WebAssemblyReplacePhysRegs.cpp
+ WebAssemblyRuntimeLibcallSignatures.cpp
WebAssemblySelectionDAGInfo.cpp
WebAssemblySetP2AlignOperands.cpp
WebAssemblyStoreResults.cpp
diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index b4763ca60ab6..b5f53114d3e1 100644
--- a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -63,89 +63,8 @@ extern "C" void LLVMInitializeWebAssemblyDisassembler() {
MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
raw_ostream &OS, raw_ostream &CS) const {
- Size = 0;
- uint64_t Pos = 0;
- // Read the opcode.
- if (Pos + sizeof(uint64_t) > Bytes.size())
- return MCDisassembler::Fail;
- uint64_t Opcode = support::endian::read64le(Bytes.data() + Pos);
- Pos += sizeof(uint64_t);
+ // TODO: Implement disassembly.
- if (Opcode >= WebAssembly::INSTRUCTION_LIST_END)
- return MCDisassembler::Fail;
-
- MI.setOpcode(Opcode);
- const MCInstrDesc &Desc = MCII->get(Opcode);
- unsigned NumFixedOperands = Desc.NumOperands;
-
- // If it's variadic, read the number of extra operands.
- unsigned NumExtraOperands = 0;
- if (Desc.isVariadic()) {
- if (Pos + sizeof(uint64_t) > Bytes.size())
- return MCDisassembler::Fail;
- NumExtraOperands = support::endian::read64le(Bytes.data() + Pos);
- Pos += sizeof(uint64_t);
- }
-
- // Read the fixed operands. These are described by the MCInstrDesc.
- for (unsigned i = 0; i < NumFixedOperands; ++i) {
- const MCOperandInfo &Info = Desc.OpInfo[i];
- switch (Info.OperandType) {
- case MCOI::OPERAND_IMMEDIATE:
- case WebAssembly::OPERAND_LOCAL:
- case WebAssembly::OPERAND_P2ALIGN:
- case WebAssembly::OPERAND_BASIC_BLOCK: {
- if (Pos + sizeof(uint64_t) > Bytes.size())
- return MCDisassembler::Fail;
- uint64_t Imm = support::endian::read64le(Bytes.data() + Pos);
- Pos += sizeof(uint64_t);
- MI.addOperand(MCOperand::createImm(Imm));
- break;
- }
- case MCOI::OPERAND_REGISTER: {
- if (Pos + sizeof(uint64_t) > Bytes.size())
- return MCDisassembler::Fail;
- uint64_t Reg = support::endian::read64le(Bytes.data() + Pos);
- Pos += sizeof(uint64_t);
- MI.addOperand(MCOperand::createReg(Reg));
- break;
- }
- case WebAssembly::OPERAND_F32IMM:
- case WebAssembly::OPERAND_F64IMM: {
- // TODO: MC converts all floating point immediate operands to double.
- // This is fine for numeric values, but may cause NaNs to change bits.
- if (Pos + sizeof(uint64_t) > Bytes.size())
- return MCDisassembler::Fail;
- uint64_t Bits = support::endian::read64le(Bytes.data() + Pos);
- Pos += sizeof(uint64_t);
- double Imm;
- memcpy(&Imm, &Bits, sizeof(Imm));
- MI.addOperand(MCOperand::createFPImm(Imm));
- break;
- }
- default:
- llvm_unreachable("unimplemented operand kind");
- }
- }
-
- // Read the extra operands.
- assert(NumExtraOperands == 0 || Desc.isVariadic());
- for (unsigned i = 0; i < NumExtraOperands; ++i) {
- if (Pos + sizeof(uint64_t) > Bytes.size())
- return MCDisassembler::Fail;
- if (Desc.TSFlags & WebAssemblyII::VariableOpIsImmediate) {
- // Decode extra immediate operands.
- uint64_t Imm = support::endian::read64le(Bytes.data() + Pos);
- MI.addOperand(MCOperand::createImm(Imm));
- } else {
- // Decode extra register operands.
- uint64_t Reg = support::endian::read64le(Bytes.data() + Pos);
- MI.addOperand(MCOperand::createReg(Reg));
- }
- Pos += sizeof(uint64_t);
- }
-
- Size = Pos;
- return MCDisassembler::Success;
+ return MCDisassembler::Fail;
}
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index 0af13cffdb04..f31dde0ce48f 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -242,3 +242,17 @@ const char *llvm::WebAssembly::TypeToString(MVT Ty) {
llvm_unreachable("unsupported type");
}
}
+
+const char *llvm::WebAssembly::TypeToString(wasm::ValType Type) {
+ switch (Type) {
+ case wasm::ValType::I32:
+ return "i32";
+ case wasm::ValType::I64:
+ return "i64";
+ case wasm::ValType::F32:
+ return "f32";
+ case wasm::ValType::F64:
+ return "f64";
+ }
+ llvm_unreachable("unsupported type");
+}
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index d11f99c1ff39..c6158720d62f 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/Wasm.h"
namespace llvm {
@@ -50,6 +51,7 @@ public:
namespace WebAssembly {
const char *TypeToString(MVT Ty);
+const char *TypeToString(wasm::ValType Type);
} // end namespace WebAssembly
diff --git a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
index fd41df7b9635..13c0fe915908 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
@@ -5,4 +5,5 @@ add_llvm_library(LLVMWebAssemblyDesc
WebAssemblyMCCodeEmitter.cpp
WebAssemblyMCTargetDesc.cpp
WebAssemblyTargetStreamer.cpp
+ WebAssemblyWasmObjectWriter.cpp
)
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 97454a824a34..7c78285fbda4 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyFixupKinds.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCDirectives.h"
@@ -22,21 +23,22 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCWasmObjectWriter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
-class WebAssemblyAsmBackend final : public MCAsmBackend {
+class WebAssemblyAsmBackendELF final : public MCAsmBackend {
bool Is64Bit;
public:
- explicit WebAssemblyAsmBackend(bool Is64Bit)
+ explicit WebAssemblyAsmBackendELF(bool Is64Bit)
: MCAsmBackend(), Is64Bit(Is64Bit) {}
- ~WebAssemblyAsmBackend() override {}
+ ~WebAssemblyAsmBackendELF() override {}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -61,6 +63,95 @@ public:
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
};
+class WebAssemblyAsmBackend final : public MCAsmBackend {
+ bool Is64Bit;
+
+public:
+ explicit WebAssemblyAsmBackend(bool Is64Bit)
+ : MCAsmBackend(), Is64Bit(Is64Bit) {}
+ ~WebAssemblyAsmBackend() override {}
+
+ unsigned getNumFixupKinds() const override {
+ return WebAssembly::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
+
+ // No instruction requires relaxation
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ return false;
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
+
+ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ MCInst &Res) const override {}
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+};
+
+bool WebAssemblyAsmBackendELF::writeNopData(uint64_t Count,
+ MCObjectWriter *OW) const {
+ for (uint64_t i = 0; i < Count; ++i)
+ OW->write8(WebAssembly::Nop);
+
+ return true;
+}
+
+void WebAssemblyAsmBackendELF::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value,
+ bool IsPCRel, MCContext &Ctx) const {
+ const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind());
+ assert(Info.Flags == 0 && "WebAssembly does not use MCFixupKindInfo flags");
+
+ unsigned NumBytes = alignTo(Info.TargetSize, 8) / 8;
+ if (Value == 0)
+ return; // Doesn't change encoding.
+
+ // Shift the value into position.
+ Value <<= Info.TargetOffset;
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the
+ // bits from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+MCObjectWriter *
+WebAssemblyAsmBackendELF::createObjectWriter(raw_pwrite_stream &OS) const {
+ return createWebAssemblyELFObjectWriter(OS, Is64Bit, 0);
+}
+
+const MCFixupKindInfo &
+WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[WebAssembly::NumTargetFixupKinds] = {
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // WebAssemblyFixupKinds.h.
+ //
+ // Name Offset (bits) Size (bits) Flags
+ { "fixup_code_sleb128_i32", 0, 5*8, 0 },
+ { "fixup_code_sleb128_i64", 0, 10*8, 0 },
+ { "fixup_code_uleb128_i32", 0, 5*8, 0 },
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+}
+
bool WebAssemblyAsmBackend::writeNopData(uint64_t Count,
MCObjectWriter *OW) const {
if (Count == 0)
@@ -74,11 +165,11 @@ bool WebAssemblyAsmBackend::writeNopData(uint64_t Count,
void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind());
assert(Info.Flags == 0 && "WebAssembly does not use MCFixupKindInfo flags");
- unsigned NumBytes = (Info.TargetSize + 7) / 8;
+ unsigned NumBytes = alignTo(Info.TargetSize, 8) / 8;
if (Value == 0)
return; // Doesn't change encoding.
@@ -96,10 +187,12 @@ void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
MCObjectWriter *
WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
- return createWebAssemblyELFObjectWriter(OS, Is64Bit, 0);
+ return createWebAssemblyWasmObjectWriter(OS, Is64Bit);
}
} // end anonymous namespace
MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Triple &TT) {
+ if (TT.isOSBinFormatELF())
+ return new WebAssemblyAsmBackendELF(TT.isArch64Bit());
return new WebAssemblyAsmBackend(TT.isArch64Bit());
}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
new file mode 100644
index 000000000000..b0af63c924bd
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
@@ -0,0 +1,31 @@
+//=- WebAssemblyFixupKinds.h - WebAssembly Specific Fixup Entries -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYFIXUPKINDS_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace WebAssembly {
+enum Fixups {
+ fixup_code_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed
+ fixup_code_sleb128_i64, // 64-bit signed
+ fixup_code_uleb128_i32, // 32-bit unsigned
+
+ fixup_code_global_index, // 32-bit unsigned
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // end namespace WebAssembly
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index d8c39216c53b..2dcec5263fa1 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -19,9 +19,9 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-mc-asm-info"
-WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
+WebAssemblyMCAsmInfoELF::~WebAssemblyMCAsmInfoELF() {}
-WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
+WebAssemblyMCAsmInfoELF::WebAssemblyMCAsmInfoELF(const Triple &T) {
PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
// TODO: What should MaxInstLength be?
@@ -51,3 +51,33 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
// WebAssembly's stack is never executable.
UsesNonexecutableStackSection = false;
}
+
+WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
+
+WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
+ PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
+
+ // TODO: What should MaxInstLength be?
+
+ UseDataRegionDirectives = true;
+
+ // Use .skip instead of .zero because .zero is confusing when used with two
+ // arguments (it doesn't actually zero things out).
+ ZeroDirective = "\t.skip\t";
+
+ Data8bitsDirective = "\t.int8\t";
+ Data16bitsDirective = "\t.int16\t";
+ Data32bitsDirective = "\t.int32\t";
+ Data64bitsDirective = "\t.int64\t";
+
+ AlignmentIsInBytes = false;
+ COMMDirectiveAlignmentIsInBytes = false;
+ LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment;
+
+ SupportsDebugInformation = true;
+
+ // For now, WebAssembly does not support exceptions.
+ ExceptionsType = ExceptionHandling::None;
+
+ // TODO: UseIntegratedAssembler?
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
index 2dcf2cd3c892..d9547096190e 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
@@ -16,12 +16,19 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/MC/MCAsmInfoWasm.h"
namespace llvm {
class Triple;
-class WebAssemblyMCAsmInfo final : public MCAsmInfoELF {
+class WebAssemblyMCAsmInfoELF final : public MCAsmInfoELF {
+public:
+ explicit WebAssemblyMCAsmInfoELF(const Triple &T);
+ ~WebAssemblyMCAsmInfoELF() override;
+};
+
+class WebAssemblyMCAsmInfo final : public MCAsmInfoWasm {
public:
explicit WebAssemblyMCAsmInfo(const Triple &T);
~WebAssemblyMCAsmInfo() override;
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index d0e0eecd3002..a0b008947491 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyFixupKinds.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -35,6 +36,7 @@ STATISTIC(MCNumFixups, "Number of MC fixups created.");
namespace {
class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
const MCInstrInfo &MCII;
+ MCContext &Ctx;
// Implementation generated by tablegen.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
@@ -46,12 +48,14 @@ class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
const MCSubtargetInfo &STI) const override;
public:
- explicit WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
+ WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), Ctx(ctx) {}
};
} // end anonymous namespace
-MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) {
- return new WebAssemblyMCCodeEmitter(MCII);
+MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx) {
+ return new WebAssemblyMCCodeEmitter(MCII, Ctx);
}
void WebAssemblyMCCodeEmitter::encodeInstruction(
@@ -63,6 +67,13 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
assert(Binary < UINT8_MAX && "Multi-byte opcodes not supported yet");
OS << uint8_t(Binary);
+ // For br_table instructions, encode the size of the table. In the MCInst,
+ // there's an index operand, one operand for each table entry, and the
+ // default operand.
+ if (MI.getOpcode() == WebAssembly::BR_TABLE_I32 ||
+ MI.getOpcode() == WebAssembly::BR_TABLE_I64)
+ encodeULEB128(MI.getNumOperands() - 2, OS);
+
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
const MCOperand &MO = MI.getOperand(i);
@@ -77,6 +88,12 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
encodeSLEB128(int32_t(MO.getImm()), OS);
} else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
encodeSLEB128(int64_t(MO.getImm()), OS);
+ } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) {
+ Fixups.push_back(MCFixup::create(
+ OS.tell() - Start, MCConstantExpr::create(MO.getImm(), Ctx),
+ MCFixupKind(WebAssembly::fixup_code_global_index), MI.getLoc()));
+ ++MCNumFixups;
+ encodeULEB128(uint64_t(MO.getImm()), OS);
} else {
encodeULEB128(uint64_t(MO.getImm()), OS);
}
@@ -102,14 +119,28 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
support::endian::Writer<support::little>(OS).write<double>(d);
}
} else if (MO.isExpr()) {
+ const MCOperandInfo &Info = Desc.OpInfo[i];
+ llvm::MCFixupKind FixupKind;
+ size_t PaddedSize;
+ if (Info.OperandType == WebAssembly::OPERAND_I32IMM) {
+ FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i32);
+ PaddedSize = 5;
+ } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
+ FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i64);
+ PaddedSize = 10;
+ } else if (Info.OperandType == WebAssembly::OPERAND_FUNCTION32 ||
+ Info.OperandType == WebAssembly::OPERAND_OFFSET32 ||
+ Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
+ FixupKind = MCFixupKind(WebAssembly::fixup_code_uleb128_i32);
+ PaddedSize = 5;
+ } else {
+ llvm_unreachable("unexpected symbolic operand kind");
+ }
Fixups.push_back(MCFixup::create(
OS.tell() - Start, MO.getExpr(),
- STI.getTargetTriple().isArch64Bit() ? FK_Data_8 : FK_Data_4,
- MI.getLoc()));
+ FixupKind, MI.getLoc()));
++MCNumFixups;
- encodeULEB128(STI.getTargetTriple().isArch64Bit() ? UINT64_MAX
- : uint64_t(UINT32_MAX),
- OS);
+ encodeULEB128(0, OS, PaddedSize - 1);
} else {
llvm_unreachable("unexpected operand kind");
}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 3dc1ded17116..9fd3ec81c258 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -36,6 +36,8 @@ using namespace llvm;
static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/,
const Triple &TT) {
+ if (TT.isOSBinFormatELF())
+ return new WebAssemblyMCAsmInfoELF(TT);
return new WebAssemblyMCAsmInfo(TT);
}
@@ -71,8 +73,8 @@ static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/,
static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo & /*MRI*/,
- MCContext & /*Ctx*/) {
- return createWebAssemblyMCCodeEmitter(MCII);
+ MCContext &Ctx) {
+ return createWebAssemblyMCCodeEmitter(MCII, Ctx);
}
static MCAsmBackend *createAsmBackend(const Target & /*T*/,
@@ -88,8 +90,12 @@ static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU,
}
static MCTargetStreamer *
-createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo & /*STI*/) {
- return new WebAssemblyTargetELFStreamer(S);
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ const Triple &TT = STI.getTargetTriple();
+ if (TT.isOSBinFormatELF())
+ return new WebAssemblyTargetELFStreamer(S);
+
+ return new WebAssemblyTargetWasmStreamer(S);
}
static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
@@ -135,12 +141,12 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() {
}
}
-WebAssembly::ValType WebAssembly::toValType(const MVT &Ty) {
+wasm::ValType WebAssembly::toValType(const MVT &Ty) {
switch (Ty.SimpleTy) {
- case MVT::i32: return WebAssembly::ValType::I32;
- case MVT::i64: return WebAssembly::ValType::I64;
- case MVT::f32: return WebAssembly::ValType::F32;
- case MVT::f64: return WebAssembly::ValType::F64;
+ case MVT::i32: return wasm::ValType::I32;
+ case MVT::i64: return wasm::ValType::I64;
+ case MVT::f32: return wasm::ValType::F32;
+ case MVT::f64: return wasm::ValType::F64;
default: llvm_unreachable("unexpected type");
}
}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 8583b772deab..795658ca96b4 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -17,6 +17,7 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Wasm.h"
namespace llvm {
@@ -34,19 +35,25 @@ class raw_pwrite_stream;
Target &getTheWebAssemblyTarget32();
Target &getTheWebAssemblyTarget64();
-MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII);
+MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx);
MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT);
MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
bool Is64Bit, uint8_t OSABI);
+MCObjectWriter *createWebAssemblyWasmObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit);
+
namespace WebAssembly {
enum OperandType {
/// Basic block label in a branch construct.
OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET,
/// Local index.
OPERAND_LOCAL,
+ /// Global index.
+ OPERAND_GLOBAL,
/// 32-bit integer immediates.
OPERAND_I32IMM,
/// 64-bit integer immediates.
@@ -62,7 +69,9 @@ enum OperandType {
/// p2align immediate for load and store address alignment.
OPERAND_P2ALIGN,
/// signature immediate for block/loop.
- OPERAND_SIGNATURE
+ OPERAND_SIGNATURE,
+ /// type signature immediate for call_indirect.
+ OPERAND_TYPEINDEX,
};
} // end namespace WebAssembly
@@ -141,40 +150,25 @@ static const unsigned StoreP2AlignOperandNo = 0;
/// This is used to indicate block signatures.
enum class ExprType {
- Void = 0x40,
- I32 = 0x7f,
- I64 = 0x7e,
- F32 = 0x7d,
- F64 = 0x7c,
- I8x16 = 0x7b,
- I16x8 = 0x7a,
- I32x4 = 0x79,
- F32x4 = 0x78,
- B8x16 = 0x77,
- B16x8 = 0x76,
- B32x4 = 0x75
-};
-
-/// This is used to indicate local types.
-enum class ValType {
- I32 = 0x7f,
- I64 = 0x7e,
- F32 = 0x7d,
- F64 = 0x7c,
- I8x16 = 0x7b,
- I16x8 = 0x7a,
- I32x4 = 0x79,
- F32x4 = 0x78,
- B8x16 = 0x77,
- B16x8 = 0x76,
- B32x4 = 0x75
+ Void = -0x40,
+ I32 = -0x01,
+ I64 = -0x02,
+ F32 = -0x03,
+ F64 = -0x04,
+ I8x16 = -0x05,
+ I16x8 = -0x06,
+ I32x4 = -0x07,
+ F32x4 = -0x08,
+ B8x16 = -0x09,
+ B16x8 = -0x0a,
+ B32x4 = -0x0b
};
/// Instruction opcodes emitted via means other than CodeGen.
static const unsigned Nop = 0x01;
static const unsigned End = 0x0b;
-ValType toValType(const MVT &Ty);
+wasm::ValType toValType(const MVT &Ty);
} // end namespace WebAssembly
} // end namespace llvm
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 3cee8b2a1844..ad59f2f40587 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -18,9 +18,11 @@
#include "WebAssemblyMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
@@ -28,6 +30,10 @@ using namespace llvm;
WebAssemblyTargetStreamer::WebAssemblyTargetStreamer(MCStreamer &S)
: MCTargetStreamer(S) {}
+void WebAssemblyTargetStreamer::emitValueType(wasm::ValType Type) {
+ Streamer.EmitSLEB128IntValue(int32_t(Type));
+}
+
WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer(
MCStreamer &S, formatted_raw_ostream &OS)
: WebAssemblyTargetStreamer(S), OS(OS) {}
@@ -35,6 +41,9 @@ WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer(
WebAssemblyTargetELFStreamer::WebAssemblyTargetELFStreamer(MCStreamer &S)
: WebAssemblyTargetStreamer(S) {}
+WebAssemblyTargetWasmStreamer::WebAssemblyTargetWasmStreamer(MCStreamer &S)
+ : WebAssemblyTargetStreamer(S) {}
+
static void PrintTypes(formatted_raw_ostream &OS, ArrayRef<MVT> Types) {
bool First = true;
for (MVT Type : Types) {
@@ -47,14 +56,28 @@ static void PrintTypes(formatted_raw_ostream &OS, ArrayRef<MVT> Types) {
OS << '\n';
}
-void WebAssemblyTargetAsmStreamer::emitParam(ArrayRef<MVT> Types) {
- OS << "\t.param \t";
- PrintTypes(OS, Types);
+void WebAssemblyTargetAsmStreamer::emitParam(MCSymbol *Symbol,
+ ArrayRef<MVT> Types) {
+ if (!Types.empty()) {
+ OS << "\t.param \t";
+
+ // FIXME: Currently this applies to the "current" function; it may
+ // be cleaner to specify an explicit symbol as part of the directive.
+
+ PrintTypes(OS, Types);
+ }
}
-void WebAssemblyTargetAsmStreamer::emitResult(ArrayRef<MVT> Types) {
- OS << "\t.result \t";
- PrintTypes(OS, Types);
+void WebAssemblyTargetAsmStreamer::emitResult(MCSymbol *Symbol,
+ ArrayRef<MVT> Types) {
+ if (!Types.empty()) {
+ OS << "\t.result \t";
+
+ // FIXME: Currently this applies to the "current" function; it may
+ // be cleaner to specify an explicit symbol as part of the directive.
+
+ PrintTypes(OS, Types);
+ }
}
void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) {
@@ -64,6 +87,31 @@ void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) {
}
}
+void WebAssemblyTargetAsmStreamer::emitGlobal(
+ ArrayRef<wasm::Global> Globals) {
+ if (!Globals.empty()) {
+ OS << "\t.globalvar \t";
+
+ bool First = true;
+ for (const wasm::Global &G : Globals) {
+ if (First)
+ First = false;
+ else
+ OS << ", ";
+ OS << WebAssembly::TypeToString(G.Type);
+ if (!G.InitialModule.empty())
+ OS << '=' << G.InitialModule << ':' << G.InitialName;
+ else
+ OS << '=' << G.InitialValue;
+ }
+ OS << '\n';
+ }
+}
+
+void WebAssemblyTargetAsmStreamer::emitStackPointer(uint32_t Index) {
+ OS << "\t.stack_pointer\t" << Index << '\n';
+}
+
void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType(
@@ -88,18 +136,30 @@ void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) {
OS << "\t.indidx \t" << *Value << '\n';
}
-void WebAssemblyTargetELFStreamer::emitParam(ArrayRef<MVT> Types) {
+void WebAssemblyTargetELFStreamer::emitParam(MCSymbol *Symbol,
+ ArrayRef<MVT> Types) {
// Nothing to emit; params are declared as part of the function signature.
}
-void WebAssemblyTargetELFStreamer::emitResult(ArrayRef<MVT> Types) {
+void WebAssemblyTargetELFStreamer::emitResult(MCSymbol *Symbol,
+ ArrayRef<MVT> Types) {
// Nothing to emit; results are declared as part of the function signature.
}
void WebAssemblyTargetELFStreamer::emitLocal(ArrayRef<MVT> Types) {
Streamer.EmitULEB128IntValue(Types.size());
for (MVT Type : Types)
- Streamer.EmitIntValue(int64_t(WebAssembly::toValType(Type)), 1);
+ emitValueType(WebAssembly::toValType(Type));
+}
+
+void WebAssemblyTargetELFStreamer::emitGlobal(
+ ArrayRef<wasm::Global> Globals) {
+ llvm_unreachable(".globalvar encoding not yet implemented");
+}
+
+void WebAssemblyTargetELFStreamer::emitStackPointer(
+ uint32_t Index) {
+ llvm_unreachable(".stack_pointer encoding not yet implemented");
}
void WebAssemblyTargetELFStreamer::emitEndFunc() {
@@ -117,4 +177,88 @@ void WebAssemblyTargetELFStreamer::emitIndirectFunctionType(
}
void WebAssemblyTargetELFStreamer::emitGlobalImport(StringRef name) {
-} \ No newline at end of file
+}
+
+void WebAssemblyTargetWasmStreamer::emitParam(MCSymbol *Symbol,
+ ArrayRef<MVT> Types) {
+ SmallVector<wasm::ValType, 4> Params;
+ for (MVT Ty : Types)
+ Params.push_back(WebAssembly::toValType(Ty));
+
+ cast<MCSymbolWasm>(Symbol)->setParams(std::move(Params));
+}
+
+void WebAssemblyTargetWasmStreamer::emitResult(MCSymbol *Symbol,
+ ArrayRef<MVT> Types) {
+ SmallVector<wasm::ValType, 4> Returns;
+ for (MVT Ty : Types)
+ Returns.push_back(WebAssembly::toValType(Ty));
+
+ cast<MCSymbolWasm>(Symbol)->setReturns(std::move(Returns));
+}
+
+void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<MVT> Types) {
+ SmallVector<std::pair<MVT, uint32_t>, 4> Grouped;
+ for (MVT Type : Types) {
+ if (Grouped.empty() || Grouped.back().first != Type)
+ Grouped.push_back(std::make_pair(Type, 1));
+ else
+ ++Grouped.back().second;
+ }
+
+ Streamer.EmitULEB128IntValue(Grouped.size());
+ for (auto Pair : Grouped) {
+ Streamer.EmitULEB128IntValue(Pair.second);
+ emitValueType(WebAssembly::toValType(Pair.first));
+ }
+}
+
+void WebAssemblyTargetWasmStreamer::emitGlobal(
+ ArrayRef<wasm::Global> Globals) {
+ // Encode the globals use by the funciton into the special .global_variables
+ // section. This will later be decoded and turned into contents for the
+ // Globals Section.
+ Streamer.PushSection();
+ Streamer.SwitchSection(Streamer.getContext()
+ .getWasmSection(".global_variables", 0, 0));
+ for (const wasm::Global &G : Globals) {
+ Streamer.EmitIntValue(int32_t(G.Type), 1);
+ Streamer.EmitIntValue(G.Mutable, 1);
+ if (G.InitialModule.empty()) {
+ Streamer.EmitIntValue(0, 1); // indicate that we have an int value
+ Streamer.EmitSLEB128IntValue(0);
+ } else {
+ Streamer.EmitIntValue(1, 1); // indicate that we have a module import
+ Streamer.EmitBytes(G.InitialModule);
+ Streamer.EmitIntValue(0, 1); // nul-terminate
+ Streamer.EmitBytes(G.InitialName);
+ Streamer.EmitIntValue(0, 1); // nul-terminate
+ }
+ }
+ Streamer.PopSection();
+}
+
+void WebAssemblyTargetWasmStreamer::emitStackPointer(uint32_t Index) {
+ Streamer.PushSection();
+ Streamer.SwitchSection(Streamer.getContext()
+ .getWasmSection(".stack_pointer", 0, 0));
+ Streamer.EmitIntValue(Index, 4);
+ Streamer.PopSection();
+}
+
+void WebAssemblyTargetWasmStreamer::emitEndFunc() {
+ llvm_unreachable(".end_func is not needed for direct wasm output");
+}
+
+void WebAssemblyTargetWasmStreamer::emitIndIdx(const MCExpr *Value) {
+ llvm_unreachable(".indidx encoding not yet implemented");
+}
+
+void WebAssemblyTargetWasmStreamer::emitIndirectFunctionType(
+ StringRef name, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
+ // Nothing to emit here. TODO: Re-design how linking works and re-evaluate
+ // whether it's necessary for .o files to declare indirect function types.
+}
+
+void WebAssemblyTargetWasmStreamer::emitGlobalImport(StringRef name) {
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 23ac3190243a..68d6747298df 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -18,10 +18,12 @@
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Wasm.h"
namespace llvm {
class MCELFStreamer;
+class MCWasmStreamer;
/// WebAssembly-specific streamer interface, to implement support
/// WebAssembly-specific assembly directives.
@@ -30,11 +32,15 @@ public:
explicit WebAssemblyTargetStreamer(MCStreamer &S);
/// .param
- virtual void emitParam(ArrayRef<MVT> Types) = 0;
+ virtual void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) = 0;
/// .result
- virtual void emitResult(ArrayRef<MVT> Types) = 0;
+ virtual void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) = 0;
/// .local
virtual void emitLocal(ArrayRef<MVT> Types) = 0;
+ /// .globalvar
+ virtual void emitGlobal(ArrayRef<wasm::Global> Globals) = 0;
+ /// .stack_pointer
+ virtual void emitStackPointer(uint32_t Index) = 0;
/// .endfunc
virtual void emitEndFunc() = 0;
/// .functype
@@ -47,6 +53,9 @@ public:
virtual void emitIndIdx(const MCExpr *Value) = 0;
/// .import_global
virtual void emitGlobalImport(StringRef name) = 0;
+
+protected:
+ void emitValueType(wasm::ValType Type);
};
/// This part is for ascii assembly output
@@ -56,9 +65,11 @@ class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer {
public:
WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
- void emitParam(ArrayRef<MVT> Types) override;
- void emitResult(ArrayRef<MVT> Types) override;
+ void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
+ void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
void emitLocal(ArrayRef<MVT> Types) override;
+ void emitGlobal(ArrayRef<wasm::Global> Globals) override;
+ void emitStackPointer(uint32_t Index) override;
void emitEndFunc() override;
void emitIndirectFunctionType(StringRef name,
SmallVectorImpl<MVT> &Params,
@@ -72,9 +83,29 @@ class WebAssemblyTargetELFStreamer final : public WebAssemblyTargetStreamer {
public:
explicit WebAssemblyTargetELFStreamer(MCStreamer &S);
- void emitParam(ArrayRef<MVT> Types) override;
- void emitResult(ArrayRef<MVT> Types) override;
+ void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
+ void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
+ void emitLocal(ArrayRef<MVT> Types) override;
+ void emitGlobal(ArrayRef<wasm::Global> Globals) override;
+ void emitStackPointer(uint32_t Index) override;
+ void emitEndFunc() override;
+ void emitIndirectFunctionType(StringRef name,
+ SmallVectorImpl<MVT> &Params,
+ SmallVectorImpl<MVT> &Results) override;
+ void emitIndIdx(const MCExpr *Value) override;
+ void emitGlobalImport(StringRef name) override;
+};
+
+/// This part is for Wasm object output
+class WebAssemblyTargetWasmStreamer final : public WebAssemblyTargetStreamer {
+public:
+ explicit WebAssemblyTargetWasmStreamer(MCStreamer &S);
+
+ void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
+ void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
void emitLocal(ArrayRef<MVT> Types) override;
+ void emitGlobal(ArrayRef<wasm::Global> Globals) override;
+ void emitStackPointer(uint32_t Index) override;
void emitEndFunc() override;
void emitIndirectFunctionType(StringRef name,
SmallVectorImpl<MVT> &Params,
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
new file mode 100644
index 000000000000..2846ec5e9337
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -0,0 +1,92 @@
+//===-- WebAssemblyWasmObjectWriter.cpp - WebAssembly Wasm Writer ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file handles Wasm-specific object emission, converting LLVM's
+/// internal fixups into the appropriate relocations.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyFixupKinds.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/MCWasmObjectWriter.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Wasm.h"
+using namespace llvm;
+
+namespace {
+class WebAssemblyWasmObjectWriter final : public MCWasmObjectTargetWriter {
+public:
+ explicit WebAssemblyWasmObjectWriter(bool Is64Bit);
+
+private:
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override;
+};
+} // end anonymous namespace
+
+WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit)
+ : MCWasmObjectTargetWriter(Is64Bit) {}
+
+// Test whether the given expression computes a function address.
+static bool IsFunctionExpr(const MCExpr *Expr) {
+ if (const MCSymbolRefExpr *SyExp =
+ dyn_cast<MCSymbolRefExpr>(Expr))
+ return cast<MCSymbolWasm>(SyExp->getSymbol()).isFunction();
+
+ if (const MCBinaryExpr *BinOp =
+ dyn_cast<MCBinaryExpr>(Expr))
+ return IsFunctionExpr(BinOp->getLHS()) != IsFunctionExpr(BinOp->getRHS());
+
+ if (const MCUnaryExpr *UnOp =
+ dyn_cast<MCUnaryExpr>(Expr))
+ return IsFunctionExpr(UnOp->getSubExpr());
+
+ return false;
+}
+
+unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ // WebAssembly functions are not allocated in the data address space. To
+ // resolve a pointer to a function, we must use a special relocation type.
+ bool IsFunction = IsFunctionExpr(Fixup.getValue());
+
+ assert(!IsPCRel);
+ switch (unsigned(Fixup.getKind())) {
+ case WebAssembly::fixup_code_sleb128_i32:
+ if (IsFunction)
+ return wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB;
+ return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB;
+ case WebAssembly::fixup_code_sleb128_i64:
+ llvm_unreachable("fixup_sleb128_i64 not implemented yet");
+ case WebAssembly::fixup_code_uleb128_i32:
+ if (IsFunction)
+ return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB;
+ return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB;
+ case FK_Data_4:
+ if (IsFunction)
+ return wasm::R_WEBASSEMBLY_TABLE_INDEX_I32;
+ return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32;
+ case FK_Data_8:
+ llvm_unreachable("FK_Data_8 not implemented yet");
+ default:
+ llvm_unreachable("unimplemented fixup kind");
+ }
+}
+
+MCObjectWriter *llvm::createWebAssemblyWasmObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit) {
+ MCWasmObjectTargetWriter *MOTW = new WebAssemblyWasmObjectWriter(Is64Bit);
+ return createWasmObjectWriter(MOTW, OS);
+}
diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt
index 64991ad14071..3433b1553e8c 100644
--- a/lib/Target/WebAssembly/README.txt
+++ b/lib/Target/WebAssembly/README.txt
@@ -145,3 +145,24 @@ WebAssemblyRegStackify could be extended, or possibly rewritten, to take
advantage of the new opportunities.
//===---------------------------------------------------------------------===//
+
+Add support for mergeable sections in the Wasm writer, such as for strings and
+floating-point constants.
+
+//===---------------------------------------------------------------------===//
+
+The function @dynamic_alloca_redzone in test/CodeGen/WebAssembly/userstack.ll
+ends up with a tee_local in its prolog which has an unused result, requiring
+an extra drop:
+
+ get_global $push8=, 0
+ tee_local $push9=, 1, $pop8
+ drop $pop9
+ [...]
+
+The prologue code initially thinks it needs an FP register, but later it
+turns out to be unneeded, so one could either approach this by being more
+clever about not inserting code for an FP in the first place, or optimizing
+away the copy later.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h
index 8738263ad847..e04c4db19c8c 100644
--- a/lib/Target/WebAssembly/WebAssembly.h
+++ b/lib/Target/WebAssembly/WebAssembly.h
@@ -46,6 +46,7 @@ FunctionPass *createWebAssemblyRegStackify();
FunctionPass *createWebAssemblyRegColoring();
FunctionPass *createWebAssemblyExplicitLocals();
FunctionPass *createWebAssemblyFixIrreducibleControlFlow();
+FunctionPass *createWebAssemblyCFGSort();
FunctionPass *createWebAssemblyCFGStackify();
FunctionPass *createWebAssemblyLowerBrUnless();
FunctionPass *createWebAssemblyRegNumbering();
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 5b4b82eb5603..d9c2dba5bace 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -14,6 +14,7 @@
///
//===----------------------------------------------------------------------===//
+#include "WebAssemblyAsmPrinter.h"
#include "InstPrinter/WebAssemblyInstPrinter.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
@@ -21,13 +22,14 @@
#include "WebAssemblyMCInstLower.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblyRegisterInfo.h"
-#include "WebAssemblySubtarget.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -38,56 +40,6 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-namespace {
-
-class WebAssemblyAsmPrinter final : public AsmPrinter {
- const MachineRegisterInfo *MRI;
- WebAssemblyFunctionInfo *MFI;
-
-public:
- WebAssemblyAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), MRI(nullptr), MFI(nullptr) {}
-
-private:
- StringRef getPassName() const override {
- return "WebAssembly Assembly Printer";
- }
-
- //===------------------------------------------------------------------===//
- // MachineFunctionPass Implementation.
- //===------------------------------------------------------------------===//
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- MRI = &MF.getRegInfo();
- MFI = MF.getInfo<WebAssemblyFunctionInfo>();
- return AsmPrinter::runOnMachineFunction(MF);
- }
-
- //===------------------------------------------------------------------===//
- // AsmPrinter Implementation.
- //===------------------------------------------------------------------===//
-
- void EmitEndOfAsmFile(Module &M) override;
- void EmitJumpTableInfo() override;
- void EmitConstantPool() override;
- void EmitFunctionBodyStart() override;
- void EmitFunctionBodyEnd() override;
- void EmitInstruction(const MachineInstr *MI) override;
- const MCExpr *lowerConstant(const Constant *CV) override;
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
- bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
-
- MVT getRegType(unsigned RegNo) const;
- std::string regToString(const MachineOperand &MO);
- WebAssemblyTargetStreamer *getTargetStreamer();
-};
-
-} // end anonymous namespace
-
//===----------------------------------------------------------------------===//
// Helpers.
//===----------------------------------------------------------------------===//
@@ -135,9 +87,19 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
}
for (const auto &G : M.globals()) {
if (!G.hasInitializer() && G.hasExternalLinkage()) {
+ uint16_t Size = M.getDataLayout().getTypeAllocSize(G.getValueType());
getTargetStreamer()->emitGlobalImport(G.getGlobalIdentifier());
+ OutStreamer->emitELFSize(getSymbol(&G),
+ MCConstantExpr::create(Size, OutContext));
}
}
+
+ if (!TM.getTargetTriple().isOSBinFormatELF()) {
+ MachineModuleInfoWasm &MMIW = MMI->getObjFileInfo<MachineModuleInfoWasm>();
+ getTargetStreamer()->emitGlobal(MMIW.getGlobals());
+ if (MMIW.hasStackPointerGlobal())
+ getTargetStreamer()->emitStackPointer(MMIW.getStackPointerGlobal());
+ }
}
void WebAssemblyAsmPrinter::EmitConstantPool() {
@@ -150,8 +112,7 @@ void WebAssemblyAsmPrinter::EmitJumpTableInfo() {
}
void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
- if (!MFI->getParams().empty())
- getTargetStreamer()->emitParam(MFI->getParams());
+ getTargetStreamer()->emitParam(CurrentFnSym, MFI->getParams());
SmallVector<MVT, 4> ResultVTs;
const Function &F(*MF->getFunction());
@@ -169,23 +130,26 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
// If the return type needs to be legalized it will get converted into
// passing a pointer.
if (ResultVTs.size() == 1)
- getTargetStreamer()->emitResult(ResultVTs);
-
- // FIXME: When ExplicitLocals is enabled by default, we won't need
- // to define the locals here (and MFI can go back to being pointer-to-const).
- for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
- unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx);
- unsigned WAReg = MFI->getWAReg(VReg);
- // Don't declare unused registers.
- if (WAReg == WebAssemblyFunctionInfo::UnusedReg)
- continue;
- // Don't redeclare parameters.
- if (WAReg < MFI->getParams().size())
- continue;
- // Don't declare stackified registers.
- if (int(WAReg) < 0)
- continue;
- MFI->addLocal(getRegType(VReg));
+ getTargetStreamer()->emitResult(CurrentFnSym, ResultVTs);
+ else
+ getTargetStreamer()->emitResult(CurrentFnSym, ArrayRef<MVT>());
+
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ assert(MFI->getLocals().empty());
+ for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx);
+ unsigned WAReg = MFI->getWAReg(VReg);
+ // Don't declare unused registers.
+ if (WAReg == WebAssemblyFunctionInfo::UnusedReg)
+ continue;
+ // Don't redeclare parameters.
+ if (WAReg < MFI->getParams().size())
+ continue;
+ // Don't declare stackified registers.
+ if (int(WAReg) < 0)
+ continue;
+ MFI->addLocal(getRegType(VReg));
+ }
}
getTargetStreamer()->emitLocal(MFI->getLocals());
@@ -194,7 +158,8 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
}
void WebAssemblyAsmPrinter::EmitFunctionBodyEnd() {
- getTargetStreamer()->emitEndFunc();
+ if (TM.getTargetTriple().isOSBinFormatELF())
+ getTargetStreamer()->emitEndFunc();
}
void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
new file mode 100644
index 000000000000..c8917b8d7e48
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
@@ -0,0 +1,77 @@
+// WebAssemblyAsmPrinter.h - WebAssembly implementation of AsmPrinter-*- C++ -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYASMPRINTER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYASMPRINTER_H
+
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class MCSymbol;
+class WebAssemblyFunctionInfo;
+class WebAssemblyTargetStreamer;
+class WebAssemblyMCInstLower;
+
+class LLVM_LIBRARY_VISIBILITY WebAssemblyAsmPrinter final : public AsmPrinter {
+ const WebAssemblySubtarget *Subtarget;
+ const MachineRegisterInfo *MRI;
+ WebAssemblyFunctionInfo *MFI;
+
+public:
+ explicit WebAssemblyAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)),
+ Subtarget(nullptr), MRI(nullptr), MFI(nullptr) {}
+
+ StringRef getPassName() const override {
+ return "WebAssembly Assembly Printer";
+ }
+
+ const WebAssemblySubtarget &getSubtarget() const { return *Subtarget; }
+
+ //===------------------------------------------------------------------===//
+ // MachineFunctionPass Implementation.
+ //===------------------------------------------------------------------===//
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
+ MRI = &MF.getRegInfo();
+ MFI = MF.getInfo<WebAssemblyFunctionInfo>();
+ return AsmPrinter::runOnMachineFunction(MF);
+ }
+
+ //===------------------------------------------------------------------===//
+ // AsmPrinter Implementation.
+ //===------------------------------------------------------------------===//
+
+ void EmitEndOfAsmFile(Module &M) override;
+ void EmitJumpTableInfo() override;
+ void EmitConstantPool() override;
+ void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
+ void EmitInstruction(const MachineInstr *MI) override;
+ const MCExpr *lowerConstant(const Constant *CV) override;
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) override;
+
+ MVT getRegType(unsigned RegNo) const;
+ std::string regToString(const MachineOperand &MO);
+ WebAssemblyTargetStreamer *getTargetStreamer();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
new file mode 100644
index 000000000000..40e1928197bc
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -0,0 +1,277 @@
+//===-- WebAssemblyCFGSort.cpp - CFG Sorting ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a CFG sorting pass.
+///
+/// This pass reorders the blocks in a function to put them into topological
+/// order, ignoring loop backedges, and without any loop being interrupted
+/// by a block not dominated by the loop header, with special care to keep the
+/// order as similar as possible to the original order.
+///
+////===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-cfg-sort"
+
+namespace {
+class WebAssemblyCFGSort final : public MachineFunctionPass {
+ StringRef getPassName() const override { return "WebAssembly CFG Sort"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyCFGSort() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyCFGSort::ID = 0;
+FunctionPass *llvm::createWebAssemblyCFGSort() {
+ return new WebAssemblyCFGSort();
+}
+
+static void MaybeUpdateTerminator(MachineBasicBlock *MBB) {
+#ifndef NDEBUG
+ bool AnyBarrier = false;
+#endif
+ bool AllAnalyzable = true;
+ for (const MachineInstr &Term : MBB->terminators()) {
+#ifndef NDEBUG
+ AnyBarrier |= Term.isBarrier();
+#endif
+ AllAnalyzable &= Term.isBranch() && !Term.isIndirectBranch();
+ }
+ assert((AnyBarrier || AllAnalyzable) &&
+ "AnalyzeBranch needs to analyze any block with a fallthrough");
+ if (AllAnalyzable)
+ MBB->updateTerminator();
+}
+
+namespace {
+/// Sort blocks by their number.
+struct CompareBlockNumbers {
+ bool operator()(const MachineBasicBlock *A,
+ const MachineBasicBlock *B) const {
+ return A->getNumber() > B->getNumber();
+ }
+};
+/// Sort blocks by their number in the opposite order..
+struct CompareBlockNumbersBackwards {
+ bool operator()(const MachineBasicBlock *A,
+ const MachineBasicBlock *B) const {
+ return A->getNumber() < B->getNumber();
+ }
+};
+/// Bookkeeping for a loop to help ensure that we don't mix blocks not dominated
+/// by the loop header among the loop's blocks.
+struct Entry {
+ const MachineLoop *Loop;
+ unsigned NumBlocksLeft;
+
+ /// List of blocks not dominated by Loop's header that are deferred until
+ /// after all of Loop's blocks have been seen.
+ std::vector<MachineBasicBlock *> Deferred;
+
+ explicit Entry(const MachineLoop *L)
+ : Loop(L), NumBlocksLeft(L->getNumBlocks()) {}
+};
+} // end anonymous namespace
+
+/// Sort the blocks, taking special care to make sure that loops are not
+/// interrupted by blocks not dominated by their header.
+/// TODO: There are many opportunities for improving the heuristics here.
+/// Explore them.
+static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
+ const MachineDominatorTree &MDT) {
+ // Prepare for a topological sort: Record the number of predecessors each
+ // block has, ignoring loop backedges.
+ MF.RenumberBlocks();
+ SmallVector<unsigned, 16> NumPredsLeft(MF.getNumBlockIDs(), 0);
+ for (MachineBasicBlock &MBB : MF) {
+ unsigned N = MBB.pred_size();
+ if (MachineLoop *L = MLI.getLoopFor(&MBB))
+ if (L->getHeader() == &MBB)
+ for (const MachineBasicBlock *Pred : MBB.predecessors())
+ if (L->contains(Pred))
+ --N;
+ NumPredsLeft[MBB.getNumber()] = N;
+ }
+
+ // Topological sort the CFG, with additional constraints:
+ // - Between a loop header and the last block in the loop, there can be
+ // no blocks not dominated by the loop header.
+ // - It's desirable to preserve the original block order when possible.
+ // We use two ready lists; Preferred and Ready. Preferred has recently
+ // processed sucessors, to help preserve block sequences from the original
+ // order. Ready has the remaining ready blocks.
+ PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
+ CompareBlockNumbers>
+ Preferred;
+ PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
+ CompareBlockNumbersBackwards>
+ Ready;
+ SmallVector<Entry, 4> Loops;
+ for (MachineBasicBlock *MBB = &MF.front();;) {
+ const MachineLoop *L = MLI.getLoopFor(MBB);
+ if (L) {
+ // If MBB is a loop header, add it to the active loop list. We can't put
+ // any blocks that it doesn't dominate until we see the end of the loop.
+ if (L->getHeader() == MBB)
+ Loops.push_back(Entry(L));
+ // For each active loop the block is in, decrement the count. If MBB is
+ // the last block in an active loop, take it off the list and pick up any
+ // blocks deferred because the header didn't dominate them.
+ for (Entry &E : Loops)
+ if (E.Loop->contains(MBB) && --E.NumBlocksLeft == 0)
+ for (auto DeferredBlock : E.Deferred)
+ Ready.push(DeferredBlock);
+ while (!Loops.empty() && Loops.back().NumBlocksLeft == 0)
+ Loops.pop_back();
+ }
+ // The main topological sort logic.
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ // Ignore backedges.
+ if (MachineLoop *SuccL = MLI.getLoopFor(Succ))
+ if (SuccL->getHeader() == Succ && SuccL->contains(MBB))
+ continue;
+ // Decrement the predecessor count. If it's now zero, it's ready.
+ if (--NumPredsLeft[Succ->getNumber()] == 0)
+ Preferred.push(Succ);
+ }
+ // Determine the block to follow MBB. First try to find a preferred block,
+ // to preserve the original block order when possible.
+ MachineBasicBlock *Next = nullptr;
+ while (!Preferred.empty()) {
+ Next = Preferred.top();
+ Preferred.pop();
+ // If X isn't dominated by the top active loop header, defer it until that
+ // loop is done.
+ if (!Loops.empty() &&
+ !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
+ Loops.back().Deferred.push_back(Next);
+ Next = nullptr;
+ continue;
+ }
+ // If Next was originally ordered before MBB, and it isn't because it was
+ // loop-rotated above the header, it's not preferred.
+ if (Next->getNumber() < MBB->getNumber() &&
+ (!L || !L->contains(Next) ||
+ L->getHeader()->getNumber() < Next->getNumber())) {
+ Ready.push(Next);
+ Next = nullptr;
+ continue;
+ }
+ break;
+ }
+ // If we didn't find a suitable block in the Preferred list, check the
+ // general Ready list.
+ if (!Next) {
+ // If there are no more blocks to process, we're done.
+ if (Ready.empty()) {
+ MaybeUpdateTerminator(MBB);
+ break;
+ }
+ for (;;) {
+ Next = Ready.top();
+ Ready.pop();
+ // If Next isn't dominated by the top active loop header, defer it until
+ // that loop is done.
+ if (!Loops.empty() &&
+ !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
+ Loops.back().Deferred.push_back(Next);
+ continue;
+ }
+ break;
+ }
+ }
+ // Move the next block into place and iterate.
+ Next->moveAfter(MBB);
+ MaybeUpdateTerminator(MBB);
+ MBB = Next;
+ }
+ assert(Loops.empty() && "Active loop list not finished");
+ MF.RenumberBlocks();
+
+#ifndef NDEBUG
+ SmallSetVector<MachineLoop *, 8> OnStack;
+
+ // Insert a sentinel representing the degenerate loop that starts at the
+ // function entry block and includes the entire function as a "loop" that
+ // executes once.
+ OnStack.insert(nullptr);
+
+ for (auto &MBB : MF) {
+ assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
+
+ MachineLoop *Loop = MLI.getLoopFor(&MBB);
+ if (Loop && &MBB == Loop->getHeader()) {
+ // Loop header. The loop predecessor should be sorted above, and the other
+ // predecessors should be backedges below.
+ for (auto Pred : MBB.predecessors())
+ assert(
+ (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) &&
+ "Loop header predecessors must be loop predecessors or backedges");
+ assert(OnStack.insert(Loop) && "Loops should be declared at most once.");
+ } else {
+ // Not a loop header. All predecessors should be sorted above.
+ for (auto Pred : MBB.predecessors())
+ assert(Pred->getNumber() < MBB.getNumber() &&
+ "Non-loop-header predecessors should be topologically sorted");
+ assert(OnStack.count(MLI.getLoopFor(&MBB)) &&
+ "Blocks must be nested in their loops");
+ }
+ while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back()))
+ OnStack.pop_back();
+ }
+ assert(OnStack.pop_back_val() == nullptr &&
+ "The function entry block shouldn't actually be a loop header");
+ assert(OnStack.empty() &&
+ "Control flow stack pushes and pops should be balanced.");
+#endif
+}
+
+bool WebAssemblyCFGSort::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** CFG Sorting **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ const auto &MLI = getAnalysis<MachineLoopInfo>();
+ auto &MDT = getAnalysis<MachineDominatorTree>();
+ // Liveness is not tracked for VALUE_STACK physreg.
+ MF.getRegInfo().invalidateLiveness();
+
+ // Sort the blocks, with contiguous loops.
+ SortBlocks(MF, MLI, MDT);
+
+ return true;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 49b9754e6b62..bd11d1b46906 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -10,12 +10,7 @@
/// \file
/// \brief This file implements a CFG stacking pass.
///
-/// This pass reorders the blocks in a function to put them into topological
-/// order, ignoring loop backedges, and without any loop being interrupted
-/// by a block not dominated by the loop header, with special care to keep the
-/// order as similar as possible to the original order.
-///
-/// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since
+/// This pass inserts BLOCK and LOOP markers to mark the start of scopes, since
/// scope boundaries serve as the labels for WebAssembly's control transfers.
///
/// This is sufficient to convert arbitrary CFGs into a form that works on
@@ -28,8 +23,6 @@
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
-#include "llvm/ADT/PriorityQueue.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -68,217 +61,6 @@ FunctionPass *llvm::createWebAssemblyCFGStackify() {
return new WebAssemblyCFGStackify();
}
-/// Return the "bottom" block of a loop. This differs from
-/// MachineLoop::getBottomBlock in that it works even if the loop is
-/// discontiguous.
-static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) {
- MachineBasicBlock *Bottom = Loop->getHeader();
- for (MachineBasicBlock *MBB : Loop->blocks())
- if (MBB->getNumber() > Bottom->getNumber())
- Bottom = MBB;
- return Bottom;
-}
-
-static void MaybeUpdateTerminator(MachineBasicBlock *MBB) {
-#ifndef NDEBUG
- bool AnyBarrier = false;
-#endif
- bool AllAnalyzable = true;
- for (const MachineInstr &Term : MBB->terminators()) {
-#ifndef NDEBUG
- AnyBarrier |= Term.isBarrier();
-#endif
- AllAnalyzable &= Term.isBranch() && !Term.isIndirectBranch();
- }
- assert((AnyBarrier || AllAnalyzable) &&
- "AnalyzeBranch needs to analyze any block with a fallthrough");
- if (AllAnalyzable)
- MBB->updateTerminator();
-}
-
-namespace {
-/// Sort blocks by their number.
-struct CompareBlockNumbers {
- bool operator()(const MachineBasicBlock *A,
- const MachineBasicBlock *B) const {
- return A->getNumber() > B->getNumber();
- }
-};
-/// Sort blocks by their number in the opposite order..
-struct CompareBlockNumbersBackwards {
- bool operator()(const MachineBasicBlock *A,
- const MachineBasicBlock *B) const {
- return A->getNumber() < B->getNumber();
- }
-};
-/// Bookkeeping for a loop to help ensure that we don't mix blocks not dominated
-/// by the loop header among the loop's blocks.
-struct Entry {
- const MachineLoop *Loop;
- unsigned NumBlocksLeft;
-
- /// List of blocks not dominated by Loop's header that are deferred until
- /// after all of Loop's blocks have been seen.
- std::vector<MachineBasicBlock *> Deferred;
-
- explicit Entry(const MachineLoop *L)
- : Loop(L), NumBlocksLeft(L->getNumBlocks()) {}
-};
-}
-
-/// Sort the blocks, taking special care to make sure that loops are not
-/// interrupted by blocks not dominated by their header.
-/// TODO: There are many opportunities for improving the heuristics here.
-/// Explore them.
-static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
- const MachineDominatorTree &MDT) {
- // Prepare for a topological sort: Record the number of predecessors each
- // block has, ignoring loop backedges.
- MF.RenumberBlocks();
- SmallVector<unsigned, 16> NumPredsLeft(MF.getNumBlockIDs(), 0);
- for (MachineBasicBlock &MBB : MF) {
- unsigned N = MBB.pred_size();
- if (MachineLoop *L = MLI.getLoopFor(&MBB))
- if (L->getHeader() == &MBB)
- for (const MachineBasicBlock *Pred : MBB.predecessors())
- if (L->contains(Pred))
- --N;
- NumPredsLeft[MBB.getNumber()] = N;
- }
-
- // Topological sort the CFG, with additional constraints:
- // - Between a loop header and the last block in the loop, there can be
- // no blocks not dominated by the loop header.
- // - It's desirable to preserve the original block order when possible.
- // We use two ready lists; Preferred and Ready. Preferred has recently
- // processed sucessors, to help preserve block sequences from the original
- // order. Ready has the remaining ready blocks.
- PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
- CompareBlockNumbers>
- Preferred;
- PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
- CompareBlockNumbersBackwards>
- Ready;
- SmallVector<Entry, 4> Loops;
- for (MachineBasicBlock *MBB = &MF.front();;) {
- const MachineLoop *L = MLI.getLoopFor(MBB);
- if (L) {
- // If MBB is a loop header, add it to the active loop list. We can't put
- // any blocks that it doesn't dominate until we see the end of the loop.
- if (L->getHeader() == MBB)
- Loops.push_back(Entry(L));
- // For each active loop the block is in, decrement the count. If MBB is
- // the last block in an active loop, take it off the list and pick up any
- // blocks deferred because the header didn't dominate them.
- for (Entry &E : Loops)
- if (E.Loop->contains(MBB) && --E.NumBlocksLeft == 0)
- for (auto DeferredBlock : E.Deferred)
- Ready.push(DeferredBlock);
- while (!Loops.empty() && Loops.back().NumBlocksLeft == 0)
- Loops.pop_back();
- }
- // The main topological sort logic.
- for (MachineBasicBlock *Succ : MBB->successors()) {
- // Ignore backedges.
- if (MachineLoop *SuccL = MLI.getLoopFor(Succ))
- if (SuccL->getHeader() == Succ && SuccL->contains(MBB))
- continue;
- // Decrement the predecessor count. If it's now zero, it's ready.
- if (--NumPredsLeft[Succ->getNumber()] == 0)
- Preferred.push(Succ);
- }
- // Determine the block to follow MBB. First try to find a preferred block,
- // to preserve the original block order when possible.
- MachineBasicBlock *Next = nullptr;
- while (!Preferred.empty()) {
- Next = Preferred.top();
- Preferred.pop();
- // If X isn't dominated by the top active loop header, defer it until that
- // loop is done.
- if (!Loops.empty() &&
- !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
- Loops.back().Deferred.push_back(Next);
- Next = nullptr;
- continue;
- }
- // If Next was originally ordered before MBB, and it isn't because it was
- // loop-rotated above the header, it's not preferred.
- if (Next->getNumber() < MBB->getNumber() &&
- (!L || !L->contains(Next) ||
- L->getHeader()->getNumber() < Next->getNumber())) {
- Ready.push(Next);
- Next = nullptr;
- continue;
- }
- break;
- }
- // If we didn't find a suitable block in the Preferred list, check the
- // general Ready list.
- if (!Next) {
- // If there are no more blocks to process, we're done.
- if (Ready.empty()) {
- MaybeUpdateTerminator(MBB);
- break;
- }
- for (;;) {
- Next = Ready.top();
- Ready.pop();
- // If Next isn't dominated by the top active loop header, defer it until
- // that loop is done.
- if (!Loops.empty() &&
- !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
- Loops.back().Deferred.push_back(Next);
- continue;
- }
- break;
- }
- }
- // Move the next block into place and iterate.
- Next->moveAfter(MBB);
- MaybeUpdateTerminator(MBB);
- MBB = Next;
- }
- assert(Loops.empty() && "Active loop list not finished");
- MF.RenumberBlocks();
-
-#ifndef NDEBUG
- SmallSetVector<MachineLoop *, 8> OnStack;
-
- // Insert a sentinel representing the degenerate loop that starts at the
- // function entry block and includes the entire function as a "loop" that
- // executes once.
- OnStack.insert(nullptr);
-
- for (auto &MBB : MF) {
- assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
-
- MachineLoop *Loop = MLI.getLoopFor(&MBB);
- if (Loop && &MBB == Loop->getHeader()) {
- // Loop header. The loop predecessor should be sorted above, and the other
- // predecessors should be backedges below.
- for (auto Pred : MBB.predecessors())
- assert(
- (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) &&
- "Loop header predecessors must be loop predecessors or backedges");
- assert(OnStack.insert(Loop) && "Loops should be declared at most once.");
- } else {
- // Not a loop header. All predecessors should be sorted above.
- for (auto Pred : MBB.predecessors())
- assert(Pred->getNumber() < MBB.getNumber() &&
- "Non-loop-header predecessors should be topologically sorted");
- assert(OnStack.count(MLI.getLoopFor(&MBB)) &&
- "Blocks must be nested in their loops");
- }
- while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back()))
- OnStack.pop_back();
- }
- assert(OnStack.pop_back_val() == nullptr &&
- "The function entry block shouldn't actually be a loop header");
- assert(OnStack.empty() &&
- "Control flow stack pushes and pops should be balanced.");
-#endif
-}
-
/// Test whether Pred has any terminators explicitly branching to MBB, as
/// opposed to falling through. Note that it's possible (eg. in unoptimized
/// code) for a branch instruction to both branch to a block and fallthrough
@@ -488,6 +270,15 @@ static void FixEndsAtEndOfFunction(
}
}
+// WebAssembly functions end with an end instruction, as if the function body
+// were a block.
+static void AppendEndToFunction(
+ MachineFunction &MF,
+ const WebAssemblyInstrInfo &TII) {
+ BuildMI(MF.back(), MF.back().end(), DebugLoc(),
+ TII.get(WebAssembly::END_FUNCTION));
+}
+
/// Insert LOOP and BLOCK markers at appropriate places.
static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
const WebAssemblyInstrInfo &TII,
@@ -555,6 +346,11 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
// Fix up block/loop signatures at the end of the function to conform to
// WebAssembly's rules.
FixEndsAtEndOfFunction(MF, MFI, BlockTops, LoopTops);
+
+ // Add an end instruction at the end of the function body.
+ if (!MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple().isOSBinFormatELF())
+ AppendEndToFunction(MF, TII);
}
bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
@@ -569,9 +365,6 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
MF.getRegInfo().invalidateLiveness();
- // Sort the blocks, with contiguous loops.
- SortBlocks(MF, MLI, MDT);
-
// Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
PlaceMarkers(MF, MLI, TII, MDT, MFI);
diff --git a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
index fc0a01ca30e5..bc6360aafd61 100644
--- a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -97,15 +97,28 @@ bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
MI.setDesc(Desc);
// Rewrite argument order
- auto Uses = MI.explicit_uses();
- MachineInstr::mop_iterator it = Uses.begin();
- const MachineOperand MO = *it;
+ SmallVector<MachineOperand, 8> Ops;
+
+ // Set up a placeholder for the type signature immediate.
+ Ops.push_back(MachineOperand::CreateImm(0));
// Set up the flags immediate, which currently has no defined flags
// so it's always zero.
- it->ChangeToImmediate(0);
-
- MI.addOperand(MF, MO);
+ Ops.push_back(MachineOperand::CreateImm(0));
+
+ for (const MachineOperand &MO :
+ make_range(MI.operands_begin() +
+ MI.getDesc().getNumDefs() + 1,
+ MI.operands_begin() +
+ MI.getNumExplicitOperands()))
+ Ops.push_back(MO);
+ Ops.push_back(MI.getOperand(MI.getDesc().getNumDefs()));
+
+ // Replace the instructions operands.
+ while (MI.getNumOperands() > MI.getDesc().getNumDefs())
+ MI.RemoveOperand(MI.getNumOperands() - 1);
+ for (const MachineOperand &MO : Ops)
+ MI.addOperand(MO);
DEBUG(dbgs() << " After transform: " << MI);
Changed = true;
diff --git a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 04ede7ff110c..41249117ae0e 100644
--- a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -31,6 +31,14 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-explicit-locals"
+// A command-line option to disable this pass. Note that this produces output
+// which is not valid WebAssembly, though it may be more convenient for writing
+// LLVM unit tests with.
+static cl::opt<bool> DisableWebAssemblyExplicitLocals(
+ "disable-wasm-explicit-locals", cl::ReallyHidden,
+ cl::desc("WebAssembly: Disable emission of get_local/set_local."),
+ cl::init(false));
+
namespace {
class WebAssemblyExplicitLocals final : public MachineFunctionPass {
StringRef getPassName() const override {
@@ -60,7 +68,25 @@ FunctionPass *llvm::createWebAssemblyExplicitLocals() {
/// if it doesn't yet have one.
static unsigned getLocalId(DenseMap<unsigned, unsigned> &Reg2Local,
unsigned &CurLocal, unsigned Reg) {
- return Reg2Local.insert(std::make_pair(Reg, CurLocal++)).first->second;
+ auto P = Reg2Local.insert(std::make_pair(Reg, CurLocal));
+ if (P.second)
+ ++CurLocal;
+ return P.first->second;
+}
+
+/// Get the appropriate drop opcode for the given register class.
+static unsigned getDropOpcode(const TargetRegisterClass *RC) {
+ if (RC == &WebAssembly::I32RegClass)
+ return WebAssembly::DROP_I32;
+ if (RC == &WebAssembly::I64RegClass)
+ return WebAssembly::DROP_I64;
+ if (RC == &WebAssembly::F32RegClass)
+ return WebAssembly::DROP_F32;
+ if (RC == &WebAssembly::F64RegClass)
+ return WebAssembly::DROP_F64;
+ if (RC == &WebAssembly::V128RegClass)
+ return WebAssembly::DROP_V128;
+ llvm_unreachable("Unexpected register class");
}
/// Get the appropriate get_local opcode for the given register class.
@@ -146,6 +172,10 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
"********** Function: "
<< MF.getName() << '\n');
+ // Disable this pass if directed to do so.
+ if (DisableWebAssemblyExplicitLocals)
+ return false;
+
// Disable this pass if we aren't doing direct wasm object emission.
if (MF.getSubtarget<WebAssemblySubtarget>()
.getTargetTriple().isOSBinFormatELF())
@@ -176,6 +206,12 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Start assigning local numbers after the last parameter.
unsigned CurLocal = MFI.getParams().size();
+ // Precompute the set of registers that are unused, so that we can insert
+ // drops to their defs.
+ BitVector UseEmpty(MRI.getNumVirtRegs());
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i)
+ UseEmpty[i] = MRI.use_empty(TargetRegisterInfo::index2VirtReg(i));
+
// Visit each instruction in the function.
for (MachineBasicBlock &MBB : MF) {
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
@@ -224,15 +260,26 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
assert(MI.getDesc().getNumDefs() <= 1);
if (MI.getDesc().getNumDefs() == 1) {
unsigned OldReg = MI.getOperand(0).getReg();
- if (!MFI.isVRegStackified(OldReg) && !MRI.use_empty(OldReg)) {
- unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+ if (!MFI.isVRegStackified(OldReg)) {
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
unsigned NewReg = MRI.createVirtualRegister(RC);
auto InsertPt = std::next(MachineBasicBlock::iterator(&MI));
- unsigned Opc = getSetLocalOpcode(RC);
- BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))
- .addImm(LocalId)
- .addReg(NewReg);
+ if (MI.getOpcode() == WebAssembly::IMPLICIT_DEF) {
+ MI.eraseFromParent();
+ Changed = true;
+ continue;
+ }
+ if (UseEmpty[TargetRegisterInfo::virtReg2Index(OldReg)]) {
+ unsigned Opc = getDropOpcode(RC);
+ BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))
+ .addReg(NewReg);
+ } else {
+ unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+ unsigned Opc = getSetLocalOpcode(RC);
+ BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))
+ .addImm(LocalId)
+ .addReg(NewReg);
+ }
MI.getOperand(0).setReg(NewReg);
MFI.stackifyVReg(NewReg);
Changed = true;
@@ -278,13 +325,16 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
}
// Define the locals.
+ // TODO: Sort the locals for better compression.
+ MFI.setNumLocals(CurLocal - MFI.getParams().size());
for (size_t i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
auto I = Reg2Local.find(Reg);
if (I == Reg2Local.end() || I->second < MFI.getParams().size())
continue;
- MFI.addLocal(typeForRegClass(MRI.getRegClass(Reg)));
+ MFI.setLocal(I->second - MFI.getParams().size(),
+ typeForRegClass(MRI.getRegClass(Reg)));
Changed = true;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index bc7020fded8c..53698ff09b10 100644
--- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -116,6 +116,8 @@ private:
case MVT::f32:
case MVT::f64:
return VT;
+ case MVT::f16:
+ return MVT::f32;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
@@ -594,12 +596,12 @@ bool WebAssemblyFastISel::fastLowerArguments() {
unsigned i = 0;
for (auto const &Arg : F->args()) {
- const AttributeSet &Attrs = F->getAttributes();
- if (Attrs.hasAttribute(i+1, Attribute::ByVal) ||
- Attrs.hasAttribute(i+1, Attribute::SwiftSelf) ||
- Attrs.hasAttribute(i+1, Attribute::SwiftError) ||
- Attrs.hasAttribute(i+1, Attribute::InAlloca) ||
- Attrs.hasAttribute(i+1, Attribute::Nest))
+ const AttributeList &Attrs = F->getAttributes();
+ if (Attrs.hasParamAttribute(i, Attribute::ByVal) ||
+ Attrs.hasParamAttribute(i, Attribute::SwiftSelf) ||
+ Attrs.hasParamAttribute(i, Attribute::SwiftError) ||
+ Attrs.hasParamAttribute(i, Attribute::InAlloca) ||
+ Attrs.hasParamAttribute(i, Attribute::Nest))
return false;
Type *ArgTy = Arg.getType();
@@ -744,19 +746,19 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
return false;
- const AttributeSet &Attrs = Call->getAttributes();
- if (Attrs.hasAttribute(i+1, Attribute::ByVal) ||
- Attrs.hasAttribute(i+1, Attribute::SwiftSelf) ||
- Attrs.hasAttribute(i+1, Attribute::SwiftError) ||
- Attrs.hasAttribute(i+1, Attribute::InAlloca) ||
- Attrs.hasAttribute(i+1, Attribute::Nest))
+ const AttributeList &Attrs = Call->getAttributes();
+ if (Attrs.hasParamAttribute(i, Attribute::ByVal) ||
+ Attrs.hasParamAttribute(i, Attribute::SwiftSelf) ||
+ Attrs.hasParamAttribute(i, Attribute::SwiftError) ||
+ Attrs.hasParamAttribute(i, Attribute::InAlloca) ||
+ Attrs.hasParamAttribute(i, Attribute::Nest))
return false;
unsigned Reg;
- if (Attrs.hasAttribute(i+1, Attribute::SExt))
+ if (Attrs.hasParamAttribute(i, Attribute::SExt))
Reg = getRegForSignedValue(V);
- else if (Attrs.hasAttribute(i+1, Attribute::ZExt))
+ else if (Attrs.hasParamAttribute(i, Attribute::ZExt))
Reg = getRegForUnsignedValue(V);
else
Reg = getRegForValue(V);
diff --git a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index adf904ee0269..76a2ff3f9803 100644
--- a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -84,7 +84,7 @@ static void FindUses(Value *V, Function &F,
// - Call with fewer arguments than needed: arguments are filled in with undef
// - Return value is not needed: drop it
// - Return value needed but not present: supply an undef
-//
+//
// For now, return nullptr without creating a wrapper if the wrapper cannot
// be generated due to incompatible types.
static Function *CreateWrapper(Function *F, FunctionType *Ty) {
@@ -148,6 +148,11 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
if (!Ty)
continue;
+ // Wasm varargs are not ABI-compatible with non-varargs. Just ignore
+ // such casts for now.
+ if (Ty->isVarArg() || F->isVarArg())
+ continue;
+
auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
if (Pair.second)
Pair.first->second = CreateWrapper(F, Ty);
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index a6a2c0bf06ae..4209bc333f23 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -24,10 +24,11 @@
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyTargetMachine.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -101,25 +102,35 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF,
MachineBasicBlock::iterator &InsertAddr,
MachineBasicBlock::iterator &InsertStore,
const DebugLoc &DL) {
- const char *ES = "__stack_pointer";
- auto *SPSymbol = MF.createExternalSymbolName(ES);
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetRegisterClass *PtrRC =
- MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
- unsigned Zero = MRI.createVirtualRegister(PtrRC);
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), Zero)
- .addImm(0);
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)),
- MachineMemOperand::MOStore, 4, 4);
- BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32))
- .addImm(2) // p2align
- .addExternalSymbol(SPSymbol)
- .addReg(Zero)
- .addReg(SrcReg)
- .addMemOperand(MMO);
+ if (MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple().isOSBinFormatELF()) {
+ const char *ES = "__stack_pointer";
+ auto *SPSymbol = MF.createExternalSymbolName(ES);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterClass *PtrRC =
+ MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+ unsigned Zero = MRI.createVirtualRegister(PtrRC);
+
+ BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), Zero)
+ .addImm(0);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)),
+ MachineMemOperand::MOStore, 4, 4);
+ BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32))
+ .addImm(2) // p2align
+ .addExternalSymbol(SPSymbol)
+ .addReg(Zero)
+ .addReg(SrcReg)
+ .addMemOperand(MMO);
+ } else {
+ MachineModuleInfoWasm &MMIW =
+ MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>();
+ BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::SET_GLOBAL_I32))
+ .addImm(MMIW.getStackPointerGlobal())
+ .addReg(SrcReg);
+ }
}
MachineBasicBlock::iterator
@@ -151,27 +162,50 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
auto &MRI = MF.getRegInfo();
auto InsertPt = MBB.begin();
+ while (InsertPt != MBB.end() && WebAssembly::isArgument(*InsertPt))
+ ++InsertPt;
DebugLoc DL;
const TargetRegisterClass *PtrRC =
MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
- unsigned Zero = MRI.createVirtualRegister(PtrRC);
unsigned SPReg = WebAssembly::SP32;
if (StackSize)
SPReg = MRI.createVirtualRegister(PtrRC);
- const char *ES = "__stack_pointer";
- auto *SPSymbol = MF.createExternalSymbolName(ES);
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero)
- .addImm(0);
- MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
- MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)),
- MachineMemOperand::MOLoad, 4, 4);
- // Load the SP value.
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg)
- .addImm(2) // p2align
- .addExternalSymbol(SPSymbol)
- .addReg(Zero) // addr
- .addMemOperand(LoadMMO);
+ if (MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple().isOSBinFormatELF()) {
+ const char *ES = "__stack_pointer";
+ auto *SPSymbol = MF.createExternalSymbolName(ES);
+ unsigned Zero = MRI.createVirtualRegister(PtrRC);
+
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero)
+ .addImm(0);
+ MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)),
+ MachineMemOperand::MOLoad, 4, 4);
+ // Load the SP value.
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg)
+ .addImm(2) // p2align
+ .addExternalSymbol(SPSymbol)
+ .addReg(Zero) // addr
+ .addMemOperand(LoadMMO);
+ } else {
+ auto &MMIW = MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>();
+ if (!MMIW.hasStackPointerGlobal()) {
+ MMIW.setStackPointerGlobal(MMIW.getGlobals().size());
+
+ // Create the stack-pointer global. For now, just use the
+ // Emscripten/Binaryen ABI names.
+ wasm::Global G;
+ G.Type = wasm::ValType::I32;
+ G.Mutable = true;
+ G.InitialValue = 0;
+ G.InitialModule = "env";
+ G.InitialName = "STACKTOP";
+ MMIW.addGlobal(G);
+ }
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GET_GLOBAL_I32), SPReg)
+ .addImm(MMIW.getStackPointerGlobal());
+ }
bool HasBP = hasBP(MF);
if (HasBP) {
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 6a7f75a6b3a1..31a5ca1f4cc2 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -95,6 +95,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Support minnan and maxnan, which otherwise default to expand.
setOperationAction(ISD::FMINNAN, T, Legal);
setOperationAction(ISD::FMAXNAN, T, Legal);
+ // WebAssembly currently has no builtin f16 support.
+ setOperationAction(ISD::FP16_TO_FP, T, Expand);
+ setOperationAction(ISD::FP_TO_FP16, T, Expand);
+ setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
+ setTruncStoreAction(T, MVT::f16, Expand);
}
for (auto T : {MVT::i32, MVT::i64}) {
@@ -253,7 +258,8 @@ bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
return true;
}
-bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
+bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
+ AttributeList Attr) const {
// The current thinking is that wasm engines will perform this optimization,
// so we can save on code size.
return true;
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 5bc723028e63..99d3d0d558f5 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -58,7 +58,7 @@ class WebAssemblyTargetLowering final : public TargetLowering {
unsigned AS) const override;
bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align,
bool *Fast) const override;
- bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
+ bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 047f4be066c0..73d1d4be293b 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -30,13 +30,15 @@ multiclass CALL<WebAssemblyRegClass vt, string prefix> {
[(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))],
!strconcat(prefix, "call\t$dst, $callee"),
0x10>;
+
let isCodeGenOnly = 1 in {
def PCALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
[(set vt:$dst, (WebAssemblycall1 I32:$callee))],
"PSEUDO CALL INDIRECT\t$callee">;
} // isCodeGenOnly = 1
- def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins i32imm:$flags, variable_ops),
+ def CALL_INDIRECT_#vt : I<(outs vt:$dst),
+ (ins TypeIndex:$type, i32imm:$flags, variable_ops),
[],
!strconcat(prefix, "call_indirect\t$dst"),
0x11>;
@@ -48,6 +50,7 @@ multiclass SIMD_CALL<ValueType vt, string prefix> {
(WebAssemblycall1 (i32 imm:$callee)))],
!strconcat(prefix, "call\t$dst, $callee"),
0x10>;
+
let isCodeGenOnly = 1 in {
def PCALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
(ins I32:$callee, variable_ops),
@@ -57,7 +60,8 @@ multiclass SIMD_CALL<ValueType vt, string prefix> {
} // isCodeGenOnly = 1
def CALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
- (ins i32imm:$flags, variable_ops),
+ (ins TypeIndex:$type, i32imm:$flags,
+ variable_ops),
[],
!strconcat(prefix, "call_indirect\t$dst"),
0x11>;
@@ -76,13 +80,15 @@ let Uses = [SP32, SP64], isCall = 1 in {
def CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops),
[(WebAssemblycall0 (i32 imm:$callee))],
"call \t$callee", 0x10>;
+
let isCodeGenOnly = 1 in {
def PCALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
[(WebAssemblycall0 I32:$callee)],
"PSEUDO CALL INDIRECT\t$callee">;
} // isCodeGenOnly = 1
- def CALL_INDIRECT_VOID : I<(outs), (ins i32imm:$flags, variable_ops),
+ def CALL_INDIRECT_VOID : I<(outs),
+ (ins TypeIndex:$type, i32imm:$flags, variable_ops),
[],
"call_indirect\t", 0x11>;
} // Uses = [SP32,SP64], isCall = 1
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 1146431e6b77..39cb1ca336f2 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -64,9 +64,12 @@ let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in {
def BLOCK : I<(outs), (ins Signature:$sig), [], "block \t$sig", 0x02>;
def LOOP : I<(outs), (ins Signature:$sig), [], "loop \t$sig", 0x03>;
-// END_BLOCK and END_LOOP are represented with the same opcode in wasm.
+// END_BLOCK, END_LOOP, and END_FUNCTION are represented with the same opcode
+// in wasm.
def END_BLOCK : I<(outs), (ins), [], "end_block", 0x0b>;
def END_LOOP : I<(outs), (ins), [], "end_loop", 0x0b>;
+let isTerminator = 1, isBarrier = 1 in
+def END_FUNCTION : I<(outs), (ins), [], "end_function", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
multiclass RETURN<WebAssemblyRegClass vt> {
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index 030be0862a56..03c9c1f8d5c0 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -55,8 +55,8 @@ defm EQ : ComparisonFP<SETOEQ, "eq ", 0x5b, 0x61>;
defm NE : ComparisonFP<SETUNE, "ne ", 0x5c, 0x62>;
} // isCommutable = 1
defm LT : ComparisonFP<SETOLT, "lt ", 0x5d, 0x63>;
-defm LE : ComparisonFP<SETOLE, "le ", 0x5e, 0x64>;
-defm GT : ComparisonFP<SETOGT, "gt ", 0x5f, 0x65>;
+defm LE : ComparisonFP<SETOLE, "le ", 0x5f, 0x65>;
+defm GT : ComparisonFP<SETOGT, "gt ", 0x5e, 0x64>;
defm GE : ComparisonFP<SETOGE, "ge ", 0x60, 0x66>;
} // Defs = [ARGUMENTS]
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 0e2d8bbaf64c..8846952e5af4 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -183,11 +183,9 @@ unsigned WebAssemblyInstrInfo::insertBranch(MachineBasicBlock &MBB,
assert(Cond.size() == 2 && "Expected a flag and a successor block");
if (Cond[0].getImm()) {
- BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).addOperand(Cond[1]);
+ BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).add(Cond[1]);
} else {
- BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS))
- .addMBB(TBB)
- .addOperand(Cond[1]);
+ BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS)).addMBB(TBB).add(Cond[1]);
}
if (!FBB)
return 1;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index dcfd1a42c6aa..a601b575f579 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -74,6 +74,9 @@ def bb_op : Operand<OtherVT>;
let OperandType = "OPERAND_LOCAL" in
def local_op : Operand<i32>;
+let OperandType = "OPERAND_GLOBAL" in
+def global_op : Operand<i32>;
+
let OperandType = "OPERAND_I32IMM" in
def i32imm_op : Operand<i32>;
@@ -104,6 +107,9 @@ def Signature : Operand<i32> {
}
} // OperandType = "OPERAND_SIGNATURE"
+let OperandType = "OPERAND_TYPEINDEX" in
+def TypeIndex : Operand<i32>;
+
} // OperandNamespace = "WebAssembly"
//===----------------------------------------------------------------------===//
@@ -178,6 +184,18 @@ let hasSideEffects = 0 in {
def TEE_LOCAL_#vt : I<(outs vt:$res), (ins local_op:$local, vt:$src), [],
"tee_local\t$res, $local, $src", 0x22>;
+ // Unused values must be dropped in some contexts.
+ def DROP_#vt : I<(outs), (ins vt:$src), [],
+ "drop\t$src", 0x1a>;
+
+ let mayLoad = 1 in
+ def GET_GLOBAL_#vt : I<(outs vt:$res), (ins global_op:$local), [],
+ "get_global\t$res, $local", 0x23>;
+
+ let mayStore = 1 in
+ def SET_GLOBAL_#vt : I<(outs), (ins global_op:$local, vt:$src), [],
+ "set_global\t$local, $src", 0x24>;
+
} // hasSideEffects = 0
}
defm : LOCAL<I32>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index b606ebb0a68d..25d77bb1f234 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -673,9 +673,9 @@ def CURRENT_MEMORY_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
Requires<[HasAddr32]>;
// Grow memory.
-def GROW_MEMORY_I32 : I<(outs), (ins i32imm:$flags, I32:$delta),
+def GROW_MEMORY_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta),
[],
- "grow_memory\t$delta", 0x40>,
+ "grow_memory\t$dst, $delta", 0x40>,
Requires<[HasAddr32]>;
} // Defs = [ARGUMENTS]
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index 7ea5d05a1b21..744a3ed427af 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -118,7 +118,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
// delete the br_unless.
assert(Inverted);
BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF))
- .addOperand(MI->getOperand(0))
+ .add(MI->getOperand(0))
.addReg(Cond);
MBB.erase(MI);
}
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 72cb1ccbe668..947c0329bb6e 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -412,7 +412,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
if (CI->doesNotReturn()) {
if (auto *F = dyn_cast<Function>(CI->getCalledValue()))
F->removeFnAttr(Attribute::NoReturn);
- CI->removeAttribute(AttributeSet::FunctionIndex, Attribute::NoReturn);
+ CI->removeAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
}
IRBuilder<> IRB(C);
@@ -435,25 +435,20 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
// Because we added the pointer to the callee as first argument, all
// argument attribute indices have to be incremented by one.
- SmallVector<AttributeSet, 8> AttributesVec;
- const AttributeSet &InvokePAL = CI->getAttributes();
- CallSite::arg_iterator AI = CI->arg_begin();
- unsigned i = 1; // Argument attribute index starts from 1
- for (unsigned e = CI->getNumArgOperands(); i <= e; ++AI, ++i) {
- if (InvokePAL.hasAttributes(i)) {
- AttrBuilder B(InvokePAL, i);
- AttributesVec.push_back(AttributeSet::get(C, i + 1, B));
- }
- }
- // Add any return attributes.
- if (InvokePAL.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(C, InvokePAL.getRetAttributes()));
- // Add any function attributes.
- if (InvokePAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(C, InvokePAL.getFnAttributes()));
+ SmallVector<AttributeSet, 8> ArgAttributes;
+ const AttributeList &InvokeAL = CI->getAttributes();
+
+ // No attributes for the callee pointer.
+ ArgAttributes.push_back(AttributeSet());
+ // Copy the argument attributes from the original
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i < e; ++i)
+ ArgAttributes.push_back(InvokeAL.getParamAttributes(i));
+
// Reconstruct the AttributesList based on the vector we constructed.
- AttributeSet NewCallPAL = AttributeSet::get(C, AttributesVec);
- NewCall->setAttributes(NewCallPAL);
+ AttributeList NewCallAL =
+ AttributeList::get(C, InvokeAL.getFnAttributes(),
+ InvokeAL.getRetAttributes(), ArgAttributes);
+ NewCall->setAttributes(NewCallAL);
CI->replaceAllUsesWith(NewCall);
@@ -624,7 +619,7 @@ void WebAssemblyLowerEmscriptenEHSjLj::createSetThrewFunction(Module &M) {
Function *F =
Function::Create(FTy, GlobalValue::ExternalLinkage, SetThrewFName, &M);
Argument *Arg1 = &*(F->arg_begin());
- Argument *Arg2 = &*(++F->arg_begin());
+ Argument *Arg2 = &*std::next(F->arg_begin());
Arg1->setName("threw");
Arg2->setName("value");
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 022a448590ec..ff186eb91503 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -14,7 +14,10 @@
//===----------------------------------------------------------------------===//
#include "WebAssemblyMCInstLower.h"
+#include "WebAssemblyAsmPrinter.h"
#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblyRuntimeLibcallSignatures.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Constants.h"
@@ -22,18 +25,85 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
MCSymbol *
WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
- return Printer.getSymbol(MO.getGlobal());
+ const GlobalValue *Global = MO.getGlobal();
+ MCSymbol *Sym = Printer.getSymbol(Global);
+ if (isa<MCSymbolELF>(Sym))
+ return Sym;
+
+ MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
+
+ if (const auto *FuncTy = dyn_cast<FunctionType>(Global->getValueType())) {
+ const MachineFunction &MF = *MO.getParent()->getParent()->getParent();
+ const TargetMachine &TM = MF.getTarget();
+ const Function &CurrentFunc = *MF.getFunction();
+
+ SmallVector<wasm::ValType, 4> Returns;
+ SmallVector<wasm::ValType, 4> Params;
+
+ wasm::ValType iPTR =
+ MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() ?
+ wasm::ValType::I64 :
+ wasm::ValType::I32;
+
+ SmallVector<MVT, 4> ResultMVTs;
+ ComputeLegalValueVTs(CurrentFunc, TM, FuncTy->getReturnType(), ResultMVTs);
+ // WebAssembly can't currently handle returning tuples.
+ if (ResultMVTs.size() <= 1)
+ for (MVT ResultMVT : ResultMVTs)
+ Returns.push_back(WebAssembly::toValType(ResultMVT));
+ else
+ Params.push_back(iPTR);
+
+ for (Type *Ty : FuncTy->params()) {
+ SmallVector<MVT, 4> ParamMVTs;
+ ComputeLegalValueVTs(CurrentFunc, TM, Ty, ParamMVTs);
+ for (MVT ParamMVT : ParamMVTs)
+ Params.push_back(WebAssembly::toValType(ParamMVT));
+ }
+
+ if (FuncTy->isVarArg())
+ Params.push_back(iPTR);
+
+ WasmSym->setReturns(std::move(Returns));
+ WasmSym->setParams(std::move(Params));
+ WasmSym->setIsFunction(true);
+ }
+
+ return WasmSym;
}
MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
const MachineOperand &MO) const {
- return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+ const char *Name = MO.getSymbolName();
+ MCSymbol *Sym = Printer.GetExternalSymbolSymbol(Name);
+ if (isa<MCSymbolELF>(Sym))
+ return Sym;
+
+ MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
+ const WebAssemblySubtarget &Subtarget = Printer.getSubtarget();
+
+ // __stack_pointer is a global variable; all other external symbols used by
+ // CodeGen are functions.
+ if (strcmp(Name, "__stack_pointer") == 0)
+ return WasmSym;
+
+ SmallVector<wasm::ValType, 4> Returns;
+ SmallVector<wasm::ValType, 4> Params;
+ GetSignature(Subtarget, Name, Returns, Params);
+
+ WasmSym->setReturns(std::move(Returns));
+ WasmSym->setParams(std::move(Params));
+ WasmSym->setIsFunction(true);
+
+ return WasmSym;
}
MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
@@ -42,6 +112,9 @@ MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
MCSymbolRefExpr::VariantKind VK =
IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION
: MCSymbolRefExpr::VK_None;
+ if (!isa<MCSymbolELF>(Sym))
+ cast<MCSymbolWasm>(Sym)->setIsFunction(IsFunc);
+
const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx);
if (Offset != 0) {
@@ -54,20 +127,34 @@ MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
return MCOperand::createExpr(Expr);
}
+// Return the WebAssembly type associated with the given register class.
+static wasm::ValType getType(const TargetRegisterClass *RC) {
+ if (RC == &WebAssembly::I32RegClass)
+ return wasm::ValType::I32;
+ if (RC == &WebAssembly::I64RegClass)
+ return wasm::ValType::I64;
+ if (RC == &WebAssembly::F32RegClass)
+ return wasm::ValType::F32;
+ if (RC == &WebAssembly::F64RegClass)
+ return wasm::ValType::F64;
+ llvm_unreachable("Unexpected register class");
+}
+
void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
+ const MCInstrDesc &Desc = MI->getDesc();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_MachineBasicBlock:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("MachineBasicBlock operand should have been rewritten");
case MachineOperand::MO_Register: {
// Ignore all implicit register operands.
@@ -80,6 +167,41 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
break;
}
case MachineOperand::MO_Immediate:
+ if (i < Desc.NumOperands) {
+ const MCOperandInfo &Info = Desc.OpInfo[i];
+ if (Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
+ MCSymbol *Sym = Printer.createTempSymbol("typeindex");
+ if (!isa<MCSymbolELF>(Sym)) {
+ SmallVector<wasm::ValType, 4> Returns;
+ SmallVector<wasm::ValType, 4> Params;
+
+ const MachineRegisterInfo &MRI =
+ MI->getParent()->getParent()->getRegInfo();
+ for (const MachineOperand &MO : MI->defs())
+ Returns.push_back(getType(MRI.getRegClass(MO.getReg())));
+ for (const MachineOperand &MO : MI->explicit_uses())
+ if (MO.isReg())
+ Params.push_back(getType(MRI.getRegClass(MO.getReg())));
+
+ // call_indirect instructions have a callee operand at the end which
+ // doesn't count as a param.
+ if (WebAssembly::isCallIndirect(*MI))
+ Params.pop_back();
+
+ MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
+ WasmSym->setReturns(std::move(Returns));
+ WasmSym->setParams(std::move(Params));
+ WasmSym->setIsFunction(true);
+
+ const MCExpr *Expr =
+ MCSymbolRefExpr::create(WasmSym,
+ MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX,
+ Ctx);
+ MCOp = MCOperand::createExpr(Expr);
+ break;
+ }
+ }
+ }
MCOp = MCOperand::createImm(MO.getImm());
break;
case MachineOperand::MO_FPImmediate: {
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
index ab4ba1c28d53..d1d2794c3b8f 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -20,7 +20,7 @@
#include "llvm/Support/Compiler.h"
namespace llvm {
-class AsmPrinter;
+class WebAssemblyAsmPrinter;
class MCContext;
class MCSymbol;
class MachineInstr;
@@ -29,7 +29,7 @@ class MachineOperand;
/// This class is used to lower an MachineInstr into an MCInst.
class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
MCContext &Ctx;
- AsmPrinter &Printer;
+ WebAssemblyAsmPrinter &Printer;
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
@@ -37,7 +37,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
bool IsFunc) const;
public:
- WebAssemblyMCInstLower(MCContext &ctx, AsmPrinter &printer)
+ WebAssemblyMCInstLower(MCContext &ctx, WebAssemblyAsmPrinter &printer)
: Ctx(ctx), Printer(printer) {}
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
};
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index 756619bebbed..1fcbb7791d4e 100644
--- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -60,6 +60,8 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
void addResult(MVT VT) { Results.push_back(VT); }
const std::vector<MVT> &getResults() const { return Results; }
+ void setNumLocals(size_t NumLocals) { Locals.resize(NumLocals, MVT::i32); }
+ void setLocal(size_t i, MVT VT) { Locals[i] = VT; }
void addLocal(MVT VT) { Locals.push_back(VT); }
const std::vector<MVT> &getLocals() const { return Locals; }
diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 96520aa5d28c..f4c9a4ef6b9c 100644
--- a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -54,7 +54,7 @@ FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
void OptimizeReturned::visitCallSite(CallSite CS) {
for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i)
- if (CS.paramHasAttr(1 + i, Attribute::Returned)) {
+ if (CS.paramHasAttr(0, Attribute::Returned)) {
Instruction *Inst = CS.getInstruction();
Value *Arg = CS.getArgOperand(i);
// Ignore constants, globals, undef, etc.
diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index 32dde88c2234..d2fbc5a22308 100644
--- a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -80,19 +80,31 @@ static bool MaybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
return false;
if (&MBB != &MF.back())
return false;
- if (&MI != &MBB.back())
- return false;
+ if (MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple().isOSBinFormatELF()) {
+ if (&MI != &MBB.back())
+ return false;
+ } else {
+ MachineBasicBlock::iterator End = MBB.end();
+ --End;
+ assert(End->getOpcode() == WebAssembly::END_FUNCTION);
+ --End;
+ if (&MI != &*End)
+ return false;
+ }
- // If the operand isn't stackified, insert a COPY to read the operand and
- // stackify it.
- MachineOperand &MO = MI.getOperand(0);
- unsigned Reg = MO.getReg();
- if (!MFI.isVRegStackified(Reg)) {
- unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
- BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg)
- .addReg(Reg);
- MO.setReg(NewReg);
- MFI.stackifyVReg(NewReg);
+ if (FallthroughOpc != WebAssembly::FALLTHROUGH_RETURN_VOID) {
+ // If the operand isn't stackified, insert a COPY to read the operand and
+ // stackify it.
+ MachineOperand &MO = MI.getOperand(0);
+ unsigned Reg = MO.getReg();
+ if (!MFI.isVRegStackified(Reg)) {
+ unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg)
+ .addReg(Reg);
+ MO.setReg(NewReg);
+ MFI.stackifyVReg(NewReg);
+ }
}
// Rewrite the return.
@@ -127,7 +139,7 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
if (Name == TLI.getLibcallName(RTLIB::MEMCPY) ||
Name == TLI.getLibcallName(RTLIB::MEMMOVE) ||
Name == TLI.getLibcallName(RTLIB::MEMSET)) {
- LibFunc::Func Func;
+ LibFunc Func;
if (LibInfo.getLibFunc(Name, Func)) {
const auto &Op2 = MI.getOperand(2);
if (!Op2.isReg())
@@ -188,9 +200,9 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
WebAssembly::COPY_V128);
break;
case WebAssembly::RETURN_VOID:
- if (!DisableWebAssemblyFallthroughReturnOpt &&
- &MBB == &MF.back() && &MI == &MBB.back())
- MI.setDesc(TII.get(WebAssembly::FALLTHROUGH_RETURN_VOID));
+ Changed |= MaybeRewriteToFallthrough(
+ MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_VOID,
+ WebAssembly::INSTRUCTION_LIST_END);
break;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 32ee09e45796..57d454746b06 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
@@ -152,7 +153,7 @@ static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
}
// Determine whether MI reads memory, writes memory, has side effects,
-// and/or uses the __stack_pointer value.
+// and/or uses the stack pointer value.
static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
bool &Write, bool &Effects, bool &StackPointer) {
assert(!MI.isPosition());
@@ -169,15 +170,28 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
if (MI.mayStore()) {
Write = true;
- // Check for stores to __stack_pointer.
- for (auto MMO : MI.memoperands()) {
- const MachinePointerInfo &MPI = MMO->getPointerInfo();
- if (MPI.V.is<const PseudoSourceValue *>()) {
- auto PSV = MPI.V.get<const PseudoSourceValue *>();
- if (const ExternalSymbolPseudoSourceValue *EPSV =
- dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
- if (StringRef(EPSV->getSymbol()) == "__stack_pointer")
- StackPointer = true;
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ if (MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple().isOSBinFormatELF()) {
+ // Check for stores to __stack_pointer.
+ for (auto MMO : MI.memoperands()) {
+ const MachinePointerInfo &MPI = MMO->getPointerInfo();
+ if (MPI.V.is<const PseudoSourceValue *>()) {
+ auto PSV = MPI.V.get<const PseudoSourceValue *>();
+ if (const ExternalSymbolPseudoSourceValue *EPSV =
+ dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
+ if (StringRef(EPSV->getSymbol()) == "__stack_pointer")
+ StackPointer = true;
+ }
+ }
+ } else {
+ // Check for sets of the stack pointer.
+ const MachineModuleInfoWasm &MMIW =
+ MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>();
+ if ((MI.getOpcode() == WebAssembly::SET_LOCAL_I32 ||
+ MI.getOpcode() == WebAssembly::SET_LOCAL_I64) &&
+ MI.getOperand(0).getImm() == MMIW.getStackPointerGlobal()) {
+ StackPointer = true;
}
}
} else if (MI.hasOrderedMemoryRef()) {
diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
new file mode 100644
index 000000000000..c02ef4a1c399
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -0,0 +1,1302 @@
+// CodeGen/RuntimeLibcallSignatures.cpp - R.T. Lib. Call Signatures -*- C++ -*--
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains signature information for runtime libcalls.
+///
+/// CodeGen uses external symbols, which it refers to by name. The WebAssembly
+/// target needs type information for all functions. This file contains a big
+/// table providing type signatures for all runtime library functions that LLVM
+/// uses.
+///
+/// This is currently a fairly heavy-handed solution.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyRuntimeLibcallSignatures.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+
+using namespace llvm;
+
+namespace {
+
+enum RuntimeLibcallSignature {
+ func,
+ f32_func_f32,
+ f32_func_f64,
+ f32_func_i32,
+ f32_func_i64,
+ f32_func_i16,
+ f64_func_f32,
+ f64_func_f64,
+ f64_func_i32,
+ f64_func_i64,
+ i32_func_f32,
+ i32_func_f64,
+ i32_func_i32,
+ i64_func_f32,
+ i64_func_f64,
+ i64_func_i64,
+ f32_func_f32_f32,
+ f32_func_f32_i32,
+ f32_func_i64_i64,
+ f64_func_f64_f64,
+ f64_func_f64_i32,
+ f64_func_i64_i64,
+ i16_func_f32,
+ i8_func_i8_i8,
+ func_f32_iPTR_iPTR,
+ func_f64_iPTR_iPTR,
+ i16_func_i16_i16,
+ i32_func_f32_f32,
+ i32_func_f64_f64,
+ i32_func_i32_i32,
+ i64_func_i64_i64,
+ i64_i64_func_f32,
+ i64_i64_func_f64,
+ i16_i16_func_i16_i16,
+ i32_i32_func_i32_i32,
+ i64_i64_func_i64_i64,
+ i64_i64_func_i64_i64_i64_i64,
+ i64_i64_i64_i64_func_i64_i64_i64_i64,
+ i64_i64_func_i64_i64_i32,
+ iPTR_func_iPTR_i32_iPTR,
+ iPTR_func_iPTR_iPTR_iPTR,
+ f32_func_f32_f32_f32,
+ f64_func_f64_f64_f64,
+ func_i64_i64_iPTR_iPTR,
+ func_iPTR_f32,
+ func_iPTR_f64,
+ func_iPTR_i32,
+ func_iPTR_i64,
+ func_iPTR_i64_i64,
+ func_iPTR_i64_i64_i64_i64,
+ func_iPTR_i64_i64_i64_i64_i64_i64,
+ i32_func_i64_i64,
+ i32_func_i64_i64_i64_i64,
+ unsupported
+};
+
+} // end anonymous namespace
+
+static const RuntimeLibcallSignature
+RuntimeLibcallSignatures[RTLIB::UNKNOWN_LIBCALL] = {
+// Integer
+/* SHL_I16 */ i16_func_i16_i16,
+/* SHL_I32 */ i32_func_i32_i32,
+/* SHL_I64 */ i64_func_i64_i64,
+/* SHL_I128 */ i64_i64_func_i64_i64_i32,
+/* SRL_I16 */ i16_func_i16_i16,
+/* SRL_I32 */ i32_func_i32_i32,
+/* SRL_I64 */ i64_func_i64_i64,
+/* SRL_I128 */ i64_i64_func_i64_i64_i32,
+/* SRA_I16 */ i16_func_i16_i16,
+/* SRA_I32 */ i32_func_i32_i32,
+/* SRA_I64 */ i64_func_i64_i64,
+/* SRA_I128 */ i64_i64_func_i64_i64_i32,
+/* MUL_I8 */ i8_func_i8_i8,
+/* MUL_I16 */ i16_func_i16_i16,
+/* MUL_I32 */ i32_func_i32_i32,
+/* MUL_I64 */ i64_func_i64_i64,
+/* MUL_I128 */ i64_i64_func_i64_i64_i64_i64,
+/* MULO_I32 */ i32_func_i32_i32,
+/* MULO_I64 */ i64_func_i64_i64,
+/* MULO_I128 */ i64_i64_func_i64_i64_i64_i64,
+/* SDIV_I8 */ i8_func_i8_i8,
+/* SDIV_I16 */ i16_func_i16_i16,
+/* SDIV_I32 */ i32_func_i32_i32,
+/* SDIV_I64 */ i64_func_i64_i64,
+/* SDIV_I128 */ i64_i64_func_i64_i64_i64_i64,
+/* UDIV_I8 */ i8_func_i8_i8,
+/* UDIV_I16 */ i16_func_i16_i16,
+/* UDIV_I32 */ i32_func_i32_i32,
+/* UDIV_I64 */ i64_func_i64_i64,
+/* UDIV_I128 */ i64_i64_func_i64_i64_i64_i64,
+/* SREM_I8 */ i8_func_i8_i8,
+/* SREM_I16 */ i16_func_i16_i16,
+/* SREM_I32 */ i32_func_i32_i32,
+/* SREM_I64 */ i64_func_i64_i64,
+/* SREM_I128 */ i64_i64_func_i64_i64_i64_i64,
+/* UREM_I8 */ i8_func_i8_i8,
+/* UREM_I16 */ i16_func_i16_i16,
+/* UREM_I32 */ i32_func_i32_i32,
+/* UREM_I64 */ i64_func_i64_i64,
+/* UREM_I128 */ i64_i64_func_i64_i64_i64_i64,
+/* SDIVREM_I8 */ i8_func_i8_i8,
+/* SDIVREM_I16 */ i16_i16_func_i16_i16,
+/* SDIVREM_I32 */ i32_i32_func_i32_i32,
+/* SDIVREM_I64 */ i64_func_i64_i64,
+/* SDIVREM_I128 */ i64_i64_i64_i64_func_i64_i64_i64_i64,
+/* UDIVREM_I8 */ i8_func_i8_i8,
+/* UDIVREM_I16 */ i16_i16_func_i16_i16,
+/* UDIVREM_I32 */ i32_i32_func_i32_i32,
+/* UDIVREM_I64 */ i64_i64_func_i64_i64,
+/* UDIVREM_I128 */ i64_i64_i64_i64_func_i64_i64_i64_i64,
+/* NEG_I32 */ i32_func_i32,
+/* NEG_I64 */ i64_func_i64,
+
+// FLOATING POINT
+/* ADD_F32 */ f32_func_f32_f32,
+/* ADD_F64 */ f64_func_f64_f64,
+/* ADD_F80 */ unsupported,
+/* ADD_F128 */ func_iPTR_i64_i64_i64_i64,
+/* ADD_PPCF128 */ unsupported,
+/* SUB_F32 */ f32_func_f32_f32,
+/* SUB_F64 */ f64_func_f64_f64,
+/* SUB_F80 */ unsupported,
+/* SUB_F128 */ func_iPTR_i64_i64_i64_i64,
+/* SUB_PPCF128 */ unsupported,
+/* MUL_F32 */ f32_func_f32_f32,
+/* MUL_F64 */ f64_func_f64_f64,
+/* MUL_F80 */ unsupported,
+/* MUL_F128 */ func_iPTR_i64_i64_i64_i64,
+/* MUL_PPCF128 */ unsupported,
+/* DIV_F32 */ f32_func_f32_f32,
+/* DIV_F64 */ f64_func_f64_f64,
+/* DIV_F80 */ unsupported,
+/* DIV_F128 */ func_iPTR_i64_i64_i64_i64,
+/* DIV_PPCF128 */ unsupported,
+/* REM_F32 */ f32_func_f32_f32,
+/* REM_F64 */ f64_func_f64_f64,
+/* REM_F80 */ unsupported,
+/* REM_F128 */ func_iPTR_i64_i64_i64_i64,
+/* REM_PPCF128 */ unsupported,
+/* FMA_F32 */ f32_func_f32_f32_f32,
+/* FMA_F64 */ f64_func_f64_f64_f64,
+/* FMA_F80 */ unsupported,
+/* FMA_F128 */ func_iPTR_i64_i64_i64_i64_i64_i64,
+/* FMA_PPCF128 */ unsupported,
+/* POWI_F32 */ f32_func_f32_i32,
+/* POWI_F64 */ f64_func_f64_i32,
+/* POWI_F80 */ unsupported,
+/* POWI_F128 */ func_iPTR_i64_i64_i64_i64,
+/* POWI_PPCF128 */ unsupported,
+/* SQRT_F32 */ f32_func_f32,
+/* SQRT_F64 */ f64_func_f64,
+/* SQRT_F80 */ unsupported,
+/* SQRT_F128 */ func_iPTR_i64_i64,
+/* SQRT_PPCF128 */ unsupported,
+/* LOG_F32 */ f32_func_f32,
+/* LOG_F64 */ f64_func_f64,
+/* LOG_F80 */ unsupported,
+/* LOG_F128 */ func_iPTR_i64_i64,
+/* LOG_PPCF128 */ unsupported,
+/* LOG2_F32 */ f32_func_f32,
+/* LOG2_F64 */ f64_func_f64,
+/* LOG2_F80 */ unsupported,
+/* LOG2_F128 */ func_iPTR_i64_i64,
+/* LOG2_PPCF128 */ unsupported,
+/* LOG10_F32 */ f32_func_f32,
+/* LOG10_F64 */ f64_func_f64,
+/* LOG10_F80 */ unsupported,
+/* LOG10_F128 */ func_iPTR_i64_i64,
+/* LOG10_PPCF128 */ unsupported,
+/* EXP_F32 */ f32_func_f32,
+/* EXP_F64 */ f64_func_f64,
+/* EXP_F80 */ unsupported,
+/* EXP_F128 */ func_iPTR_i64_i64,
+/* EXP_PPCF128 */ unsupported,
+/* EXP2_F32 */ f32_func_f32,
+/* EXP2_F64 */ f64_func_f64,
+/* EXP2_F80 */ unsupported,
+/* EXP2_F128 */ func_iPTR_i64_i64,
+/* EXP2_PPCF128 */ unsupported,
+/* SIN_F32 */ f32_func_f32,
+/* SIN_F64 */ f64_func_f64,
+/* SIN_F80 */ unsupported,
+/* SIN_F128 */ func_iPTR_i64_i64,
+/* SIN_PPCF128 */ unsupported,
+/* COS_F32 */ f32_func_f32,
+/* COS_F64 */ f64_func_f64,
+/* COS_F80 */ unsupported,
+/* COS_F128 */ func_iPTR_i64_i64,
+/* COS_PPCF128 */ unsupported,
+/* SINCOS_F32 */ func_f32_iPTR_iPTR,
+/* SINCOS_F64 */ func_f64_iPTR_iPTR,
+/* SINCOS_F80 */ unsupported,
+/* SINCOS_F128 */ func_i64_i64_iPTR_iPTR,
+/* SINCOS_PPCF128 */ unsupported,
+/* POW_F32 */ f32_func_f32_f32,
+/* POW_F64 */ f64_func_f64_f64,
+/* POW_F80 */ unsupported,
+/* POW_F128 */ func_iPTR_i64_i64_i64_i64,
+/* POW_PPCF128 */ unsupported,
+/* CEIL_F32 */ f32_func_f32,
+/* CEIL_F64 */ f64_func_f64,
+/* CEIL_F80 */ unsupported,
+/* CEIL_F128 */ func_iPTR_i64_i64,
+/* CEIL_PPCF128 */ unsupported,
+/* TRUNC_F32 */ f32_func_f32,
+/* TRUNC_F64 */ f64_func_f64,
+/* TRUNC_F80 */ unsupported,
+/* TRUNC_F128 */ func_iPTR_i64_i64,
+/* TRUNC_PPCF128 */ unsupported,
+/* RINT_F32 */ f32_func_f32,
+/* RINT_F64 */ f64_func_f64,
+/* RINT_F80 */ unsupported,
+/* RINT_F128 */ func_iPTR_i64_i64,
+/* RINT_PPCF128 */ unsupported,
+/* NEARBYINT_F32 */ f32_func_f32,
+/* NEARBYINT_F64 */ f64_func_f64,
+/* NEARBYINT_F80 */ unsupported,
+/* NEARBYINT_F128 */ func_iPTR_i64_i64,
+/* NEARBYINT_PPCF128 */ unsupported,
+/* ROUND_F32 */ f32_func_f32,
+/* ROUND_F64 */ f64_func_f64,
+/* ROUND_F80 */ unsupported,
+/* ROUND_F128 */ func_iPTR_i64_i64,
+/* ROUND_PPCF128 */ unsupported,
+/* FLOOR_F32 */ f32_func_f32,
+/* FLOOR_F64 */ f64_func_f64,
+/* FLOOR_F80 */ unsupported,
+/* FLOOR_F128 */ func_iPTR_i64_i64,
+/* FLOOR_PPCF128 */ unsupported,
+/* COPYSIGN_F32 */ f32_func_f32_f32,
+/* COPYSIGN_F64 */ f64_func_f64_f64,
+/* COPYSIGN_F80 */ unsupported,
+/* COPYSIGN_F128 */ func_iPTR_i64_i64_i64_i64,
+/* COPYSIGN_PPCF128 */ unsupported,
+/* FMIN_F32 */ f32_func_f32_f32,
+/* FMIN_F64 */ f64_func_f64_f64,
+/* FMIN_F80 */ unsupported,
+/* FMIN_F128 */ func_iPTR_i64_i64_i64_i64,
+/* FMIN_PPCF128 */ unsupported,
+/* FMAX_F32 */ f32_func_f32_f32,
+/* FMAX_F64 */ f64_func_f64_f64,
+/* FMAX_F80 */ unsupported,
+/* FMAX_F128 */ func_iPTR_i64_i64_i64_i64,
+/* FMAX_PPCF128 */ unsupported,
+
+// CONVERSION
+/* FPEXT_F32_PPCF128 */ unsupported,
+/* FPEXT_F64_PPCF128 */ unsupported,
+/* FPEXT_F64_F128 */ func_iPTR_f64,
+/* FPEXT_F32_F128 */ func_iPTR_f32,
+/* FPEXT_F32_F64 */ f64_func_f32,
+/* FPEXT_F16_F32 */ f32_func_i16,
+/* FPROUND_F32_F16 */ i16_func_f32,
+/* FPROUND_F64_F16 */ unsupported,
+/* FPROUND_F80_F16 */ unsupported,
+/* FPROUND_F128_F16 */ unsupported,
+/* FPROUND_PPCF128_F16 */ unsupported,
+/* FPROUND_F64_F32 */ f32_func_f64,
+/* FPROUND_F80_F32 */ unsupported,
+/* FPROUND_F128_F32 */ f32_func_i64_i64,
+/* FPROUND_PPCF128_F32 */ unsupported,
+/* FPROUND_F80_F64 */ unsupported,
+/* FPROUND_F128_F64 */ f64_func_i64_i64,
+/* FPROUND_PPCF128_F64 */ unsupported,
+/* FPTOSINT_F32_I32 */ i32_func_f32,
+/* FPTOSINT_F32_I64 */ i64_func_f32,
+/* FPTOSINT_F32_I128 */ i64_i64_func_f32,
+/* FPTOSINT_F64_I32 */ i32_func_f64,
+/* FPTOSINT_F64_I64 */ i64_func_f64,
+/* FPTOSINT_F64_I128 */ i64_i64_func_f64,
+/* FPTOSINT_F80_I32 */ unsupported,
+/* FPTOSINT_F80_I64 */ unsupported,
+/* FPTOSINT_F80_I128 */ unsupported,
+/* FPTOSINT_F128_I32 */ i32_func_i64_i64,
+/* FPTOSINT_F128_I64 */ i64_func_i64_i64,
+/* FPTOSINT_F128_I128 */ i64_i64_func_i64_i64,
+/* FPTOSINT_PPCF128_I32 */ unsupported,
+/* FPTOSINT_PPCF128_I64 */ unsupported,
+/* FPTOSINT_PPCF128_I128 */ unsupported,
+/* FPTOUINT_F32_I32 */ i32_func_f32,
+/* FPTOUINT_F32_I64 */ i64_func_f32,
+/* FPTOUINT_F32_I128 */ i64_i64_func_f32,
+/* FPTOUINT_F64_I32 */ i32_func_f64,
+/* FPTOUINT_F64_I64 */ i64_func_f64,
+/* FPTOUINT_F64_I128 */ i64_i64_func_f64,
+/* FPTOUINT_F80_I32 */ unsupported,
+/* FPTOUINT_F80_I64 */ unsupported,
+/* FPTOUINT_F80_I128 */ unsupported,
+/* FPTOUINT_F128_I32 */ i32_func_i64_i64,
+/* FPTOUINT_F128_I64 */ i64_func_i64_i64,
+/* FPTOUINT_F128_I128 */ i64_i64_func_i64_i64,
+/* FPTOUINT_PPCF128_I32 */ unsupported,
+/* FPTOUINT_PPCF128_I64 */ unsupported,
+/* FPTOUINT_PPCF128_I128 */ unsupported,
+/* SINTTOFP_I32_F32 */ f32_func_i32,
+/* SINTTOFP_I32_F64 */ f64_func_i32,
+/* SINTTOFP_I32_F80 */ unsupported,
+/* SINTTOFP_I32_F128 */ func_iPTR_i32,
+/* SINTTOFP_I32_PPCF128 */ unsupported,
+/* SINTTOFP_I64_F32 */ f32_func_i64,
+/* SINTTOFP_I64_F64 */ f64_func_i64,
+/* SINTTOFP_I64_F80 */ unsupported,
+/* SINTTOFP_I64_F128 */ func_iPTR_i64,
+/* SINTTOFP_I64_PPCF128 */ unsupported,
+/* SINTTOFP_I128_F32 */ f32_func_i64_i64,
+/* SINTTOFP_I128_F64 */ f64_func_i64_i64,
+/* SINTTOFP_I128_F80 */ unsupported,
+/* SINTTOFP_I128_F128 */ func_iPTR_i64_i64,
+/* SINTTOFP_I128_PPCF128 */ unsupported,
+/* UINTTOFP_I32_F32 */ f32_func_i32,
+/* UINTTOFP_I32_F64 */ f64_func_i64,
+/* UINTTOFP_I32_F80 */ unsupported,
+/* UINTTOFP_I32_F128 */ func_iPTR_i32,
+/* UINTTOFP_I32_PPCF128 */ unsupported,
+/* UINTTOFP_I64_F32 */ f32_func_i64,
+/* UINTTOFP_I64_F64 */ f64_func_i64,
+/* UINTTOFP_I64_F80 */ unsupported,
+/* UINTTOFP_I64_F128 */ func_iPTR_i64,
+/* UINTTOFP_I64_PPCF128 */ unsupported,
+/* UINTTOFP_I128_F32 */ f32_func_i64_i64,
+/* UINTTOFP_I128_F64 */ f64_func_i64_i64,
+/* UINTTOFP_I128_F80 */ unsupported,
+/* UINTTOFP_I128_F128 */ func_iPTR_i64_i64,
+/* UINTTOFP_I128_PPCF128 */ unsupported,
+
+// COMPARISON
+/* OEQ_F32 */ i32_func_f32_f32,
+/* OEQ_F64 */ i32_func_f64_f64,
+/* OEQ_F128 */ i32_func_i64_i64_i64_i64,
+/* OEQ_PPCF128 */ unsupported,
+/* UNE_F32 */ i32_func_f32_f32,
+/* UNE_F64 */ i32_func_f64_f64,
+/* UNE_F128 */ i32_func_i64_i64_i64_i64,
+/* UNE_PPCF128 */ unsupported,
+/* OGE_F32 */ i32_func_f32_f32,
+/* OGE_F64 */ i32_func_f64_f64,
+/* OGE_F128 */ i32_func_i64_i64_i64_i64,
+/* OGE_PPCF128 */ unsupported,
+/* OLT_F32 */ i32_func_f32_f32,
+/* OLT_F64 */ i32_func_f64_f64,
+/* OLT_F128 */ i32_func_i64_i64_i64_i64,
+/* OLT_PPCF128 */ unsupported,
+/* OLE_F32 */ i32_func_f32_f32,
+/* OLE_F64 */ i32_func_f64_f64,
+/* OLE_F128 */ i32_func_i64_i64_i64_i64,
+/* OLE_PPCF128 */ unsupported,
+/* OGT_F32 */ i32_func_f32_f32,
+/* OGT_F64 */ i32_func_f64_f64,
+/* OGT_F128 */ i32_func_i64_i64_i64_i64,
+/* OGT_PPCF128 */ unsupported,
+/* UO_F32 */ i32_func_f32_f32,
+/* UO_F64 */ i32_func_f64_f64,
+/* UO_F128 */ i32_func_i64_i64_i64_i64,
+/* UO_PPCF128 */ unsupported,
+/* O_F32 */ i32_func_f32_f32,
+/* O_F64 */ i32_func_f64_f64,
+/* O_F128 */ i32_func_i64_i64_i64_i64,
+/* O_PPCF128 */ unsupported,
+
+// MEMORY
+/* MEMCPY */ iPTR_func_iPTR_iPTR_iPTR,
+/* MEMSET */ iPTR_func_iPTR_i32_iPTR,
+/* MEMMOVE */ iPTR_func_iPTR_iPTR_iPTR,
+
+// ELEMENT-WISE ATOMIC MEMORY
+/* MEMCPY_ELEMENT_ATOMIC_1 */ iPTR_func_iPTR_iPTR_iPTR,
+/* MEMCPY_ELEMENT_ATOMIC_2 */ iPTR_func_iPTR_iPTR_iPTR,
+/* MEMCPY_ELEMENT_ATOMIC_4 */ iPTR_func_iPTR_iPTR_iPTR,
+/* MEMCPY_ELEMENT_ATOMIC_8 */ iPTR_func_iPTR_iPTR_iPTR,
+/* MEMCPY_ELEMENT_ATOMIC_16 */ iPTR_func_iPTR_iPTR_iPTR,
+
+// EXCEPTION HANDLING
+/* UNWIND_RESUME */ unsupported,
+
+// Note: there's two sets of atomics libcalls; see
+// <http://llvm.org/docs/Atomics.html> for more info on the
+// difference between them.
+
+// Atomic '__sync_*' libcalls.
+/* SYNC_VAL_COMPARE_AND_SWAP_1 */ unsupported,
+/* SYNC_VAL_COMPARE_AND_SWAP_2 */ unsupported,
+/* SYNC_VAL_COMPARE_AND_SWAP_4 */ unsupported,
+/* SYNC_VAL_COMPARE_AND_SWAP_8 */ unsupported,
+/* SYNC_VAL_COMPARE_AND_SWAP_16 */ unsupported,
+/* SYNC_LOCK_TEST_AND_SET_1 */ unsupported,
+/* SYNC_LOCK_TEST_AND_SET_2 */ unsupported,
+/* SYNC_LOCK_TEST_AND_SET_4 */ unsupported,
+/* SYNC_LOCK_TEST_AND_SET_8 */ unsupported,
+/* SYNC_LOCK_TEST_AND_SET_16 */ unsupported,
+/* SYNC_FETCH_AND_ADD_1 */ unsupported,
+/* SYNC_FETCH_AND_ADD_2 */ unsupported,
+/* SYNC_FETCH_AND_ADD_4 */ unsupported,
+/* SYNC_FETCH_AND_ADD_8 */ unsupported,
+/* SYNC_FETCH_AND_ADD_16 */ unsupported,
+/* SYNC_FETCH_AND_SUB_1 */ unsupported,
+/* SYNC_FETCH_AND_SUB_2 */ unsupported,
+/* SYNC_FETCH_AND_SUB_4 */ unsupported,
+/* SYNC_FETCH_AND_SUB_8 */ unsupported,
+/* SYNC_FETCH_AND_SUB_16 */ unsupported,
+/* SYNC_FETCH_AND_AND_1 */ unsupported,
+/* SYNC_FETCH_AND_AND_2 */ unsupported,
+/* SYNC_FETCH_AND_AND_4 */ unsupported,
+/* SYNC_FETCH_AND_AND_8 */ unsupported,
+/* SYNC_FETCH_AND_AND_16 */ unsupported,
+/* SYNC_FETCH_AND_OR_1 */ unsupported,
+/* SYNC_FETCH_AND_OR_2 */ unsupported,
+/* SYNC_FETCH_AND_OR_4 */ unsupported,
+/* SYNC_FETCH_AND_OR_8 */ unsupported,
+/* SYNC_FETCH_AND_OR_16 */ unsupported,
+/* SYNC_FETCH_AND_XOR_1 */ unsupported,
+/* SYNC_FETCH_AND_XOR_2 */ unsupported,
+/* SYNC_FETCH_AND_XOR_4 */ unsupported,
+/* SYNC_FETCH_AND_XOR_8 */ unsupported,
+/* SYNC_FETCH_AND_XOR_16 */ unsupported,
+/* SYNC_FETCH_AND_NAND_1 */ unsupported,
+/* SYNC_FETCH_AND_NAND_2 */ unsupported,
+/* SYNC_FETCH_AND_NAND_4 */ unsupported,
+/* SYNC_FETCH_AND_NAND_8 */ unsupported,
+/* SYNC_FETCH_AND_NAND_16 */ unsupported,
+/* SYNC_FETCH_AND_MAX_1 */ unsupported,
+/* SYNC_FETCH_AND_MAX_2 */ unsupported,
+/* SYNC_FETCH_AND_MAX_4 */ unsupported,
+/* SYNC_FETCH_AND_MAX_8 */ unsupported,
+/* SYNC_FETCH_AND_MAX_16 */ unsupported,
+/* SYNC_FETCH_AND_UMAX_1 */ unsupported,
+/* SYNC_FETCH_AND_UMAX_2 */ unsupported,
+/* SYNC_FETCH_AND_UMAX_4 */ unsupported,
+/* SYNC_FETCH_AND_UMAX_8 */ unsupported,
+/* SYNC_FETCH_AND_UMAX_16 */ unsupported,
+/* SYNC_FETCH_AND_MIN_1 */ unsupported,
+/* SYNC_FETCH_AND_MIN_2 */ unsupported,
+/* SYNC_FETCH_AND_MIN_4 */ unsupported,
+/* SYNC_FETCH_AND_MIN_8 */ unsupported,
+/* SYNC_FETCH_AND_MIN_16 */ unsupported,
+/* SYNC_FETCH_AND_UMIN_1 */ unsupported,
+/* SYNC_FETCH_AND_UMIN_2 */ unsupported,
+/* SYNC_FETCH_AND_UMIN_4 */ unsupported,
+/* SYNC_FETCH_AND_UMIN_8 */ unsupported,
+/* SYNC_FETCH_AND_UMIN_16 */ unsupported,
+
+// Atomic '__atomic_*' libcalls.
+/* ATOMIC_LOAD */ unsupported,
+/* ATOMIC_LOAD_1 */ unsupported,
+/* ATOMIC_LOAD_2 */ unsupported,
+/* ATOMIC_LOAD_4 */ unsupported,
+/* ATOMIC_LOAD_8 */ unsupported,
+/* ATOMIC_LOAD_16 */ unsupported,
+
+/* ATOMIC_STORE */ unsupported,
+/* ATOMIC_STORE_1 */ unsupported,
+/* ATOMIC_STORE_2 */ unsupported,
+/* ATOMIC_STORE_4 */ unsupported,
+/* ATOMIC_STORE_8 */ unsupported,
+/* ATOMIC_STORE_16 */ unsupported,
+
+/* ATOMIC_EXCHANGE */ unsupported,
+/* ATOMIC_EXCHANGE_1 */ unsupported,
+/* ATOMIC_EXCHANGE_2 */ unsupported,
+/* ATOMIC_EXCHANGE_4 */ unsupported,
+/* ATOMIC_EXCHANGE_8 */ unsupported,
+/* ATOMIC_EXCHANGE_16 */ unsupported,
+
+/* ATOMIC_COMPARE_EXCHANGE */ unsupported,
+/* ATOMIC_COMPARE_EXCHANGE_1 */ unsupported,
+/* ATOMIC_COMPARE_EXCHANGE_2 */ unsupported,
+/* ATOMIC_COMPARE_EXCHANGE_4 */ unsupported,
+/* ATOMIC_COMPARE_EXCHANGE_8 */ unsupported,
+/* ATOMIC_COMPARE_EXCHANGE_16 */ unsupported,
+
+/* ATOMIC_FETCH_ADD_1 */ unsupported,
+/* ATOMIC_FETCH_ADD_2 */ unsupported,
+/* ATOMIC_FETCH_ADD_4 */ unsupported,
+/* ATOMIC_FETCH_ADD_8 */ unsupported,
+/* ATOMIC_FETCH_ADD_16 */ unsupported,
+
+/* ATOMIC_FETCH_SUB_1 */ unsupported,
+/* ATOMIC_FETCH_SUB_2 */ unsupported,
+/* ATOMIC_FETCH_SUB_4 */ unsupported,
+/* ATOMIC_FETCH_SUB_8 */ unsupported,
+/* ATOMIC_FETCH_SUB_16 */ unsupported,
+
+/* ATOMIC_FETCH_AND_1 */ unsupported,
+/* ATOMIC_FETCH_AND_2 */ unsupported,
+/* ATOMIC_FETCH_AND_4 */ unsupported,
+/* ATOMIC_FETCH_AND_8 */ unsupported,
+/* ATOMIC_FETCH_AND_16 */ unsupported,
+
+/* ATOMIC_FETCH_OR_1 */ unsupported,
+/* ATOMIC_FETCH_OR_2 */ unsupported,
+/* ATOMIC_FETCH_OR_4 */ unsupported,
+/* ATOMIC_FETCH_OR_8 */ unsupported,
+/* ATOMIC_FETCH_OR_16 */ unsupported,
+
+/* ATOMIC_FETCH_XOR_1 */ unsupported,
+/* ATOMIC_FETCH_XOR_2 */ unsupported,
+/* ATOMIC_FETCH_XOR_4 */ unsupported,
+/* ATOMIC_FETCH_XOR_8 */ unsupported,
+/* ATOMIC_FETCH_XOR_16 */ unsupported,
+
+/* ATOMIC_FETCH_NAND_1 */ unsupported,
+/* ATOMIC_FETCH_NAND_2 */ unsupported,
+/* ATOMIC_FETCH_NAND_4 */ unsupported,
+/* ATOMIC_FETCH_NAND_8 */ unsupported,
+/* ATOMIC_FETCH_NAND_16 */ unsupported,
+
+// Stack Protector Fail.
+/* STACKPROTECTOR_CHECK_FAIL */ func,
+
+// Deoptimization.
+/* DEOPTIMIZE */ unsupported,
+
+};
+
+static const char *
+RuntimeLibcallNames[RTLIB::UNKNOWN_LIBCALL] = {
+/* SHL_I16 */ "__ashlhi3",
+/* SHL_I32 */ "__ashlsi3",
+/* SHL_I64 */ "__ashldi3",
+/* SHL_I128 */ "__ashlti3",
+/* SRL_I16 */ "__lshrhi3",
+/* SRL_I32 */ "__lshrsi3",
+/* SRL_I64 */ "__lshrdi3",
+/* SRL_I128 */ "__lshrti3",
+/* SRA_I16 */ "__ashrhi3",
+/* SRA_I32 */ "__ashrsi3",
+/* SRA_I64 */ "__ashrdi3",
+/* SRA_I128 */ "__ashrti3",
+/* MUL_I8 */ "__mulqi3",
+/* MUL_I16 */ "__mulhi3",
+/* MUL_I32 */ "__mulsi3",
+/* MUL_I64 */ "__muldi3",
+/* MUL_I128 */ "__multi3",
+/* MULO_I32 */ "__mulosi4",
+/* MULO_I64 */ "__mulodi4",
+/* MULO_I128 */ "__muloti4",
+/* SDIV_I8 */ "__divqi3",
+/* SDIV_I16 */ "__divhi3",
+/* SDIV_I32 */ "__divsi3",
+/* SDIV_I64 */ "__divdi3",
+/* SDIV_I128 */ "__divti3",
+/* UDIV_I8 */ "__udivqi3",
+/* UDIV_I16 */ "__udivhi3",
+/* UDIV_I32 */ "__udivsi3",
+/* UDIV_I64 */ "__udivdi3",
+/* UDIV_I128 */ "__udivti3",
+/* SREM_I8 */ "__modqi3",
+/* SREM_I16 */ "__modhi3",
+/* SREM_I32 */ "__modsi3",
+/* SREM_I64 */ "__moddi3",
+/* SREM_I128 */ "__modti3",
+/* UREM_I8 */ "__umodqi3",
+/* UREM_I16 */ "__umodhi3",
+/* UREM_I32 */ "__umodsi3",
+/* UREM_I64 */ "__umoddi3",
+/* UREM_I128 */ "__umodti3",
+/* SDIVREM_I8 */ nullptr,
+/* SDIVREM_I16 */ nullptr,
+/* SDIVREM_I32 */ nullptr,
+/* SDIVREM_I64 */ nullptr,
+/* SDIVREM_I128 */ nullptr,
+/* UDIVREM_I8 */ nullptr,
+/* UDIVREM_I16 */ nullptr,
+/* UDIVREM_I32 */ nullptr,
+/* UDIVREM_I64 */ nullptr,
+/* UDIVREM_I128 */ nullptr,
+/* NEG_I32 */ "__negsi2",
+/* NEG_I64 */ "__negdi2",
+/* ADD_F32 */ "__addsf3",
+/* ADD_F64 */ "__adddf3",
+/* ADD_F80 */ nullptr,
+/* ADD_F128 */ "__addtf3",
+/* ADD_PPCF128 */ nullptr,
+/* SUB_F32 */ "__subsf3",
+/* SUB_F64 */ "__subdf3",
+/* SUB_F80 */ nullptr,
+/* SUB_F128 */ "__subtf3",
+/* SUB_PPCF128 */ nullptr,
+/* MUL_F32 */ "__mulsf3",
+/* MUL_F64 */ "__muldf3",
+/* MUL_F80 */ nullptr,
+/* MUL_F128 */ "__multf3",
+/* MUL_PPCF128 */ nullptr,
+/* DIV_F32 */ "__divsf3",
+/* DIV_F64 */ "__divdf3",
+/* DIV_F80 */ nullptr,
+/* DIV_F128 */ "__divtf3",
+/* DIV_PPCF128 */ nullptr,
+/* REM_F32 */ "fmodf",
+/* REM_F64 */ "fmod",
+/* REM_F80 */ nullptr,
+/* REM_F128 */ "fmodl",
+/* REM_PPCF128 */ nullptr,
+/* FMA_F32 */ "fmaf",
+/* FMA_F64 */ "fma",
+/* FMA_F80 */ nullptr,
+/* FMA_F128 */ "fmal",
+/* FMA_PPCF128 */ nullptr,
+/* POWI_F32 */ "__powisf2",
+/* POWI_F64 */ "__powidf2",
+/* POWI_F80 */ nullptr,
+/* POWI_F128 */ "__powitf2",
+/* POWI_PPCF128 */ nullptr,
+/* SQRT_F32 */ "sqrtf",
+/* SQRT_F64 */ "sqrt",
+/* SQRT_F80 */ nullptr,
+/* SQRT_F128 */ "sqrtl",
+/* SQRT_PPCF128 */ nullptr,
+/* LOG_F32 */ "logf",
+/* LOG_F64 */ "log",
+/* LOG_F80 */ nullptr,
+/* LOG_F128 */ "logl",
+/* LOG_PPCF128 */ nullptr,
+/* LOG2_F32 */ "log2f",
+/* LOG2_F64 */ "log2",
+/* LOG2_F80 */ nullptr,
+/* LOG2_F128 */ "log2l",
+/* LOG2_PPCF128 */ nullptr,
+/* LOG10_F32 */ "log10f",
+/* LOG10_F64 */ "log10",
+/* LOG10_F80 */ nullptr,
+/* LOG10_F128 */ "log10l",
+/* LOG10_PPCF128 */ nullptr,
+/* EXP_F32 */ "expf",
+/* EXP_F64 */ "exp",
+/* EXP_F80 */ nullptr,
+/* EXP_F128 */ "expl",
+/* EXP_PPCF128 */ nullptr,
+/* EXP2_F32 */ "exp2f",
+/* EXP2_F64 */ "exp2",
+/* EXP2_F80 */ nullptr,
+/* EXP2_F128 */ "exp2l",
+/* EXP2_PPCF128 */ nullptr,
+/* SIN_F32 */ "sinf",
+/* SIN_F64 */ "sin",
+/* SIN_F80 */ nullptr,
+/* SIN_F128 */ "sinl",
+/* SIN_PPCF128 */ nullptr,
+/* COS_F32 */ "cosf",
+/* COS_F64 */ "cos",
+/* COS_F80 */ nullptr,
+/* COS_F128 */ "cosl",
+/* COS_PPCF128 */ nullptr,
+/* SINCOS_F32 */ "sincosf",
+/* SINCOS_F64 */ "sincos",
+/* SINCOS_F80 */ nullptr,
+/* SINCOS_F128 */ "sincosl",
+/* SINCOS_PPCF128 */ nullptr,
+/* POW_F32 */ "powf",
+/* POW_F64 */ "pow",
+/* POW_F80 */ nullptr,
+/* POW_F128 */ "powl",
+/* POW_PPCF128 */ nullptr,
+/* CEIL_F32 */ "ceilf",
+/* CEIL_F64 */ "ceil",
+/* CEIL_F80 */ nullptr,
+/* CEIL_F128 */ "ceill",
+/* CEIL_PPCF128 */ nullptr,
+/* TRUNC_F32 */ "truncf",
+/* TRUNC_F64 */ "trunc",
+/* TRUNC_F80 */ nullptr,
+/* TRUNC_F128 */ "truncl",
+/* TRUNC_PPCF128 */ nullptr,
+/* RINT_F32 */ "rintf",
+/* RINT_F64 */ "rint",
+/* RINT_F80 */ nullptr,
+/* RINT_F128 */ "rintl",
+/* RINT_PPCF128 */ nullptr,
+/* NEARBYINT_F32 */ "nearbyintf",
+/* NEARBYINT_F64 */ "nearbyint",
+/* NEARBYINT_F80 */ nullptr,
+/* NEARBYINT_F128 */ "nearbyintl",
+/* NEARBYINT_PPCF128 */ nullptr,
+/* ROUND_F32 */ "roundf",
+/* ROUND_F64 */ "round",
+/* ROUND_F80 */ nullptr,
+/* ROUND_F128 */ "roundl",
+/* ROUND_PPCF128 */ nullptr,
+/* FLOOR_F32 */ "floorf",
+/* FLOOR_F64 */ "floor",
+/* FLOOR_F80 */ nullptr,
+/* FLOOR_F128 */ "floorl",
+/* FLOOR_PPCF128 */ nullptr,
+/* COPYSIGN_F32 */ "copysignf",
+/* COPYSIGN_F64 */ "copysign",
+/* COPYSIGN_F80 */ nullptr,
+/* COPYSIGN_F128 */ "copysignl",
+/* COPYSIGN_PPCF128 */ nullptr,
+/* FMIN_F32 */ "fminf",
+/* FMIN_F64 */ "fmin",
+/* FMIN_F80 */ nullptr,
+/* FMIN_F128 */ "fminl",
+/* FMIN_PPCF128 */ nullptr,
+/* FMAX_F32 */ "fmaxf",
+/* FMAX_F64 */ "fmax",
+/* FMAX_F80 */ nullptr,
+/* FMAX_F128 */ "fmaxl",
+/* FMAX_PPCF128 */ nullptr,
+/* FPEXT_F32_PPCF128 */ nullptr,
+/* FPEXT_F64_PPCF128 */ nullptr,
+/* FPEXT_F64_F128 */ "__extenddftf2",
+/* FPEXT_F32_F128 */ "__extendsftf2",
+/* FPEXT_F32_F64 */ "__extendsfdf2",
+/* FPEXT_F16_F32 */ "__gnu_h2f_ieee",
+/* FPROUND_F32_F16 */ "__gnu_f2h_ieee",
+/* FPROUND_F64_F16 */ nullptr,
+/* FPROUND_F80_F16 */ nullptr,
+/* FPROUND_F128_F16 */ nullptr,
+/* FPROUND_PPCF128_F16 */ nullptr,
+/* FPROUND_F64_F32 */ "__truncdfsf2",
+/* FPROUND_F80_F32 */ "__truncxfsf2",
+/* FPROUND_F128_F32 */ "__trunctfsf2",
+/* FPROUND_PPCF128_F32 */ nullptr,
+/* FPROUND_F80_F64 */ "__truncxfdf2",
+/* FPROUND_F128_F64 */ "__trunctfdf2",
+/* FPROUND_PPCF128_F64 */ nullptr,
+/* FPTOSINT_F32_I32 */ "__fixsfsi",
+/* FPTOSINT_F32_I64 */ "__fixsfdi",
+/* FPTOSINT_F32_I128 */ "__fixsfti",
+/* FPTOSINT_F64_I32 */ "__fixdfsi",
+/* FPTOSINT_F64_I64 */ "__fixdfdi",
+/* FPTOSINT_F64_I128 */ "__fixdfti",
+/* FPTOSINT_F80_I32 */ "__fixxfsi",
+/* FPTOSINT_F80_I64 */ "__fixxfdi",
+/* FPTOSINT_F80_I128 */ "__fixxfti",
+/* FPTOSINT_F128_I32 */ "__fixtfsi",
+/* FPTOSINT_F128_I64 */ "__fixtfdi",
+/* FPTOSINT_F128_I128 */ "__fixtfti",
+/* FPTOSINT_PPCF128_I32 */ nullptr,
+/* FPTOSINT_PPCF128_I64 */ nullptr,
+/* FPTOSINT_PPCF128_I128 */ nullptr,
+/* FPTOUINT_F32_I32 */ "__fixunssfsi",
+/* FPTOUINT_F32_I64 */ "__fixunssfdi",
+/* FPTOUINT_F32_I128 */ "__fixunssfti",
+/* FPTOUINT_F64_I32 */ "__fixunsdfsi",
+/* FPTOUINT_F64_I64 */ "__fixunsdfdi",
+/* FPTOUINT_F64_I128 */ "__fixunsdfti",
+/* FPTOUINT_F80_I32 */ "__fixunsxfsi",
+/* FPTOUINT_F80_I64 */ "__fixunsxfdi",
+/* FPTOUINT_F80_I128 */ "__fixunsxfti",
+/* FPTOUINT_F128_I32 */ "__fixunstfsi",
+/* FPTOUINT_F128_I64 */ "__fixunstfdi",
+/* FPTOUINT_F128_I128 */ "__fixunstfti",
+/* FPTOUINT_PPCF128_I32 */ nullptr,
+/* FPTOUINT_PPCF128_I64 */ nullptr,
+/* FPTOUINT_PPCF128_I128 */ nullptr,
+/* SINTTOFP_I32_F32 */ "__floatsisf",
+/* SINTTOFP_I32_F64 */ "__floatsidf",
+/* SINTTOFP_I32_F80 */ nullptr,
+/* SINTTOFP_I32_F128 */ "__floatsitf",
+/* SINTTOFP_I32_PPCF128 */ nullptr,
+/* SINTTOFP_I64_F32 */ "__floatdisf",
+/* SINTTOFP_I64_F64 */ "__floatdidf",
+/* SINTTOFP_I64_F80 */ nullptr,
+/* SINTTOFP_I64_F128 */ "__floatditf",
+/* SINTTOFP_I64_PPCF128 */ nullptr,
+/* SINTTOFP_I128_F32 */ "__floattisf",
+/* SINTTOFP_I128_F64 */ "__floattidf",
+/* SINTTOFP_I128_F80 */ nullptr,
+/* SINTTOFP_I128_F128 */ "__floattitf",
+/* SINTTOFP_I128_PPCF128 */ nullptr,
+/* UINTTOFP_I32_F32 */ "__floatunsisf",
+/* UINTTOFP_I32_F64 */ "__floatunsidf",
+/* UINTTOFP_I32_F80 */ nullptr,
+/* UINTTOFP_I32_F128 */ "__floatunsitf",
+/* UINTTOFP_I32_PPCF128 */ nullptr,
+/* UINTTOFP_I64_F32 */ "__floatundisf",
+/* UINTTOFP_I64_F64 */ "__floatundidf",
+/* UINTTOFP_I64_F80 */ nullptr,
+/* UINTTOFP_I64_F128 */ "__floatunditf",
+/* UINTTOFP_I64_PPCF128 */ nullptr,
+/* UINTTOFP_I128_F32 */ "__floatuntisf",
+/* UINTTOFP_I128_F64 */ "__floatuntidf",
+/* UINTTOFP_I128_F80 */ nullptr,
+/* UINTTOFP_I128_F128 */ "__floatuntitf",
+/* UINTTOFP_I128_PPCF128 */ nullptr,
+/* OEQ_F32 */ "__eqsf2",
+/* OEQ_F64 */ "__eqdf2",
+/* OEQ_F128 */ "__eqtf2",
+/* OEQ_PPCF128 */ nullptr,
+/* UNE_F32 */ "__nesf2",
+/* UNE_F64 */ "__nedf2",
+/* UNE_F128 */ "__netf2",
+/* UNE_PPCF128 */ nullptr,
+/* OGE_F32 */ "__gesf2",
+/* OGE_F64 */ "__gedf2",
+/* OGE_F128 */ "__getf2",
+/* OGE_PPCF128 */ nullptr,
+/* OLT_F32 */ "__ltsf2",
+/* OLT_F64 */ "__ltdf2",
+/* OLT_F128 */ "__lttf2",
+/* OLT_PPCF128 */ nullptr,
+/* OLE_F32 */ "__lesf2",
+/* OLE_F64 */ "__ledf2",
+/* OLE_F128 */ "__letf2",
+/* OLE_PPCF128 */ nullptr,
+/* OGT_F32 */ "__gtsf2",
+/* OGT_F64 */ "__gtdf2",
+/* OGT_F128 */ "__gttf2",
+/* OGT_PPCF128 */ nullptr,
+/* UO_F32 */ "__unordsf2",
+/* UO_F64 */ "__unorddf2",
+/* UO_F128 */ "__unordtf2",
+/* UO_PPCF128 */ nullptr,
+/* O_F32 */ "__unordsf2",
+/* O_F64 */ "__unorddf2",
+/* O_F128 */ "__unordtf2",
+/* O_PPCF128 */ nullptr,
+/* MEMCPY */ "memcpy",
+/* MEMMOVE */ "memset",
+/* MEMSET */ "memmove",
+/* MEMCPY_ELEMENT_ATOMIC_1 */ "MEMCPY_ELEMENT_ATOMIC_1",
+/* MEMCPY_ELEMENT_ATOMIC_2 */ "MEMCPY_ELEMENT_ATOMIC_2",
+/* MEMCPY_ELEMENT_ATOMIC_4 */ "MEMCPY_ELEMENT_ATOMIC_4",
+/* MEMCPY_ELEMENT_ATOMIC_8 */ "MEMCPY_ELEMENT_ATOMIC_8",
+/* MEMCPY_ELEMENT_ATOMIC_16 */ "MEMCPY_ELEMENT_ATOMIC_16",
+/* UNWIND_RESUME */ "_Unwind_Resume",
+/* SYNC_VAL_COMPARE_AND_SWAP_1 */ "__sync_val_compare_and_swap_1",
+/* SYNC_VAL_COMPARE_AND_SWAP_2 */ "__sync_val_compare_and_swap_2",
+/* SYNC_VAL_COMPARE_AND_SWAP_4 */ "__sync_val_compare_and_swap_4",
+/* SYNC_VAL_COMPARE_AND_SWAP_8 */ "__sync_val_compare_and_swap_8",
+/* SYNC_VAL_COMPARE_AND_SWAP_16 */ "__sync_val_compare_and_swap_16",
+/* SYNC_LOCK_TEST_AND_SET_1 */ "__sync_lock_test_and_set_1",
+/* SYNC_LOCK_TEST_AND_SET_2 */ "__sync_lock_test_and_set_2",
+/* SYNC_LOCK_TEST_AND_SET_4 */ "__sync_lock_test_and_set_4",
+/* SYNC_LOCK_TEST_AND_SET_8 */ "__sync_lock_test_and_set_8",
+/* SYNC_LOCK_TEST_AND_SET_16 */ "__sync_lock_test_and_set_16",
+/* SYNC_FETCH_AND_ADD_1 */ "__sync_fetch_and_add_1",
+/* SYNC_FETCH_AND_ADD_2 */ "__sync_fetch_and_add_2",
+/* SYNC_FETCH_AND_ADD_4 */ "__sync_fetch_and_add_4",
+/* SYNC_FETCH_AND_ADD_8 */ "__sync_fetch_and_add_8",
+/* SYNC_FETCH_AND_ADD_16 */ "__sync_fetch_and_add_16",
+/* SYNC_FETCH_AND_SUB_1 */ "__sync_fetch_and_sub_1",
+/* SYNC_FETCH_AND_SUB_2 */ "__sync_fetch_and_sub_2",
+/* SYNC_FETCH_AND_SUB_4 */ "__sync_fetch_and_sub_4",
+/* SYNC_FETCH_AND_SUB_8 */ "__sync_fetch_and_sub_8",
+/* SYNC_FETCH_AND_SUB_16 */ "__sync_fetch_and_sub_16",
+/* SYNC_FETCH_AND_AND_1 */ "__sync_fetch_and_and_1",
+/* SYNC_FETCH_AND_AND_2 */ "__sync_fetch_and_and_2",
+/* SYNC_FETCH_AND_AND_4 */ "__sync_fetch_and_and_4",
+/* SYNC_FETCH_AND_AND_8 */ "__sync_fetch_and_and_8",
+/* SYNC_FETCH_AND_AND_16 */ "__sync_fetch_and_and_16",
+/* SYNC_FETCH_AND_OR_1 */ "__sync_fetch_and_or_1",
+/* SYNC_FETCH_AND_OR_2 */ "__sync_fetch_and_or_2",
+/* SYNC_FETCH_AND_OR_4 */ "__sync_fetch_and_or_4",
+/* SYNC_FETCH_AND_OR_8 */ "__sync_fetch_and_or_8",
+/* SYNC_FETCH_AND_OR_16 */ "__sync_fetch_and_or_16",
+/* SYNC_FETCH_AND_XOR_1 */ "__sync_fetch_and_xor_1",
+/* SYNC_FETCH_AND_XOR_2 */ "__sync_fetch_and_xor_2",
+/* SYNC_FETCH_AND_XOR_4 */ "__sync_fetch_and_xor_4",
+/* SYNC_FETCH_AND_XOR_8 */ "__sync_fetch_and_xor_8",
+/* SYNC_FETCH_AND_XOR_16 */ "__sync_fetch_and_xor_16",
+/* SYNC_FETCH_AND_NAND_1 */ "__sync_fetch_and_nand_1",
+/* SYNC_FETCH_AND_NAND_2 */ "__sync_fetch_and_nand_2",
+/* SYNC_FETCH_AND_NAND_4 */ "__sync_fetch_and_nand_4",
+/* SYNC_FETCH_AND_NAND_8 */ "__sync_fetch_and_nand_8",
+/* SYNC_FETCH_AND_NAND_16 */ "__sync_fetch_and_nand_16",
+/* SYNC_FETCH_AND_MAX_1 */ "__sync_fetch_and_max_1",
+/* SYNC_FETCH_AND_MAX_2 */ "__sync_fetch_and_max_2",
+/* SYNC_FETCH_AND_MAX_4 */ "__sync_fetch_and_max_4",
+/* SYNC_FETCH_AND_MAX_8 */ "__sync_fetch_and_max_8",
+/* SYNC_FETCH_AND_MAX_16 */ "__sync_fetch_and_max_16",
+/* SYNC_FETCH_AND_UMAX_1 */ "__sync_fetch_and_umax_1",
+/* SYNC_FETCH_AND_UMAX_2 */ "__sync_fetch_and_umax_2",
+/* SYNC_FETCH_AND_UMAX_4 */ "__sync_fetch_and_umax_4",
+/* SYNC_FETCH_AND_UMAX_8 */ "__sync_fetch_and_umax_8",
+/* SYNC_FETCH_AND_UMAX_16 */ "__sync_fetch_and_umax_16",
+/* SYNC_FETCH_AND_MIN_1 */ "__sync_fetch_and_min_1",
+/* SYNC_FETCH_AND_MIN_2 */ "__sync_fetch_and_min_2",
+/* SYNC_FETCH_AND_MIN_4 */ "__sync_fetch_and_min_4",
+/* SYNC_FETCH_AND_MIN_8 */ "__sync_fetch_and_min_8",
+/* SYNC_FETCH_AND_MIN_16 */ "__sync_fetch_and_min_16",
+/* SYNC_FETCH_AND_UMIN_1 */ "__sync_fetch_and_umin_1",
+/* SYNC_FETCH_AND_UMIN_2 */ "__sync_fetch_and_umin_2",
+/* SYNC_FETCH_AND_UMIN_4 */ "__sync_fetch_and_umin_4",
+/* SYNC_FETCH_AND_UMIN_8 */ "__sync_fetch_and_umin_8",
+/* SYNC_FETCH_AND_UMIN_16 */ "__sync_fetch_and_umin_16",
+
+/* ATOMIC_LOAD */ "__atomic_load",
+/* ATOMIC_LOAD_1 */ "__atomic_load_1",
+/* ATOMIC_LOAD_2 */ "__atomic_load_2",
+/* ATOMIC_LOAD_4 */ "__atomic_load_4",
+/* ATOMIC_LOAD_8 */ "__atomic_load_8",
+/* ATOMIC_LOAD_16 */ "__atomic_load_16",
+
+/* ATOMIC_STORE */ "__atomic_store",
+/* ATOMIC_STORE_1 */ "__atomic_store_1",
+/* ATOMIC_STORE_2 */ "__atomic_store_2",
+/* ATOMIC_STORE_4 */ "__atomic_store_4",
+/* ATOMIC_STORE_8 */ "__atomic_store_8",
+/* ATOMIC_STORE_16 */ "__atomic_store_16",
+
+/* ATOMIC_EXCHANGE */ "__atomic_exchange",
+/* ATOMIC_EXCHANGE_1 */ "__atomic_exchange_1",
+/* ATOMIC_EXCHANGE_2 */ "__atomic_exchange_2",
+/* ATOMIC_EXCHANGE_4 */ "__atomic_exchange_4",
+/* ATOMIC_EXCHANGE_8 */ "__atomic_exchange_8",
+/* ATOMIC_EXCHANGE_16 */ "__atomic_exchange_16",
+
+/* ATOMIC_COMPARE_EXCHANGE */ "__atomic_compare_exchange",
+/* ATOMIC_COMPARE_EXCHANGE_1 */ "__atomic_compare_exchange_1",
+/* ATOMIC_COMPARE_EXCHANGE_2 */ "__atomic_compare_exchange_2",
+/* ATOMIC_COMPARE_EXCHANGE_4 */ "__atomic_compare_exchange_4",
+/* ATOMIC_COMPARE_EXCHANGE_8 */ "__atomic_compare_exchange_8",
+/* ATOMIC_COMPARE_EXCHANGE_16 */ "__atomic_compare_exchange_16",
+
+/* ATOMIC_FETCH_ADD_1 */ "__atomic_fetch_add_1",
+/* ATOMIC_FETCH_ADD_2 */ "__atomic_fetch_add_2",
+/* ATOMIC_FETCH_ADD_4 */ "__atomic_fetch_add_4",
+/* ATOMIC_FETCH_ADD_8 */ "__atomic_fetch_add_8",
+/* ATOMIC_FETCH_ADD_16 */ "__atomic_fetch_add_16",
+/* ATOMIC_FETCH_SUB_1 */ "__atomic_fetch_sub_1",
+/* ATOMIC_FETCH_SUB_2 */ "__atomic_fetch_sub_2",
+/* ATOMIC_FETCH_SUB_4 */ "__atomic_fetch_sub_4",
+/* ATOMIC_FETCH_SUB_8 */ "__atomic_fetch_sub_8",
+/* ATOMIC_FETCH_SUB_16 */ "__atomic_fetch_sub_16",
+/* ATOMIC_FETCH_AND_1 */ "__atomic_fetch_and_1",
+/* ATOMIC_FETCH_AND_2 */ "__atomic_fetch_and_2",
+/* ATOMIC_FETCH_AND_4 */ "__atomic_fetch_and_4",
+/* ATOMIC_FETCH_AND_8 */ "__atomic_fetch_and_8",
+/* ATOMIC_FETCH_AND_16 */ "__atomic_fetch_and_16",
+/* ATOMIC_FETCH_OR_1 */ "__atomic_fetch_or_1",
+/* ATOMIC_FETCH_OR_2 */ "__atomic_fetch_or_2",
+/* ATOMIC_FETCH_OR_4 */ "__atomic_fetch_or_4",
+/* ATOMIC_FETCH_OR_8 */ "__atomic_fetch_or_8",
+/* ATOMIC_FETCH_OR_16 */ "__atomic_fetch_or_16",
+/* ATOMIC_FETCH_XOR_1 */ "__atomic_fetch_xor_1",
+/* ATOMIC_FETCH_XOR_2 */ "__atomic_fetch_xor_2",
+/* ATOMIC_FETCH_XOR_4 */ "__atomic_fetch_xor_4",
+/* ATOMIC_FETCH_XOR_8 */ "__atomic_fetch_xor_8",
+/* ATOMIC_FETCH_XOR_16 */ "__atomic_fetch_xor_16",
+/* ATOMIC_FETCH_NAND_1 */ "__atomic_fetch_nand_1",
+/* ATOMIC_FETCH_NAND_2 */ "__atomic_fetch_nand_2",
+/* ATOMIC_FETCH_NAND_4 */ "__atomic_fetch_nand_4",
+/* ATOMIC_FETCH_NAND_8 */ "__atomic_fetch_nand_8",
+/* ATOMIC_FETCH_NAND_16 */ "__atomic_fetch_nand_16",
+
+/* STACKPROTECTOR_CHECK_FAIL */ "__stack_chk_fail",
+
+/* DEOPTIMIZE */ "__llvm_deoptimize",
+};
+
+void llvm::GetSignature(const WebAssemblySubtarget &Subtarget,
+ RTLIB::Libcall LC, SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params) {
+ assert(Rets.empty());
+ assert(Params.empty());
+
+ WebAssembly::ExprType iPTR = Subtarget.hasAddr64() ?
+ WebAssembly::ExprType::I64 :
+ WebAssembly::ExprType::I32;
+
+ switch (RuntimeLibcallSignatures[LC]) {
+ case func:
+ break;
+ case f32_func_f32:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case f32_func_f64:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case f32_func_i32:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case f32_func_i64:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case f32_func_i16:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case f64_func_f32:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case f64_func_f64:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case f64_func_i32:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case f64_func_i64:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i32_func_f32:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case i32_func_f64:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case i32_func_i32:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case i64_func_f32:
+ Rets.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case i64_func_f64:
+ Rets.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case i64_func_i64:
+ Rets.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case f32_func_f32_f32:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case f32_func_f32_i32:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case f32_func_i64_i64:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case f64_func_f64_f64:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case f64_func_f64_i32:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case f64_func_i64_i64:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i16_func_f32:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case i8_func_i8_i8:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case func_f32_iPTR_iPTR:
+ Params.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ break;
+ case func_f64_iPTR_iPTR:
+ Params.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ break;
+ case i16_func_i16_i16:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case i32_func_f32_f32:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case i32_func_f64_f64:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case i32_func_i32_i32:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case i64_func_i64_i64:
+ Rets.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i64_i64_func_f32:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case i64_i64_func_f64:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case i16_i16_func_i16_i16:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I32);
+ Rets.push_back(wasm::ValType::I32);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case i32_i32_func_i32_i32:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I32);
+ Rets.push_back(wasm::ValType::I32);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case i64_i64_func_i64_i64:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i64_i64_func_i64_i64_i64_i64:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i64_i64_i64_i64_func_i64_i64_i64_i64:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i64_i64_func_i64_i64_i32:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case iPTR_func_iPTR_i32_iPTR:
+ Rets.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType(iPTR));
+ break;
+ case iPTR_func_iPTR_iPTR_iPTR:
+ Rets.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ break;
+ case f32_func_f32_f32_f32:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case f64_func_f64_f64_f64:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case func_i64_i64_iPTR_iPTR:
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ break;
+ case func_iPTR_f32:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case func_iPTR_f64:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case func_iPTR_i32:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case func_iPTR_i64:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case func_iPTR_i64_i64:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case func_iPTR_i64_i64_i64_i64:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case func_iPTR_i64_i64_i64_i64_i64_i64:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i32_func_i64_i64:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i32_func_i64_i64_i64_i64:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case unsupported:
+ llvm_unreachable("unsupported runtime library signature");
+ }
+}
+
+void llvm::GetSignature(const WebAssemblySubtarget &Subtarget, const char *Name,
+ SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params) {
+ assert(strcmp(RuntimeLibcallNames[RTLIB::DEOPTIMIZE], "__llvm_deoptimize") ==
+ 0);
+
+ for (size_t i = 0, e = RTLIB::UNKNOWN_LIBCALL; i < e; ++i)
+ if (RuntimeLibcallNames[i] && strcmp(RuntimeLibcallNames[i], Name) == 0)
+ return GetSignature(Subtarget, RTLIB::Libcall(i), Rets, Params);
+
+ llvm_unreachable("unexpected runtime library name");
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
new file mode 100644
index 000000000000..129067604784
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
@@ -0,0 +1,37 @@
+// CodeGen/RuntimeLibcallSignatures.h - R.T. Lib. Call Signatures -*- C++ -*--//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file provides signature information for runtime libcalls.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_RUNTIME_LIBCALL_SIGNATURES_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_RUNTIME_LIBCALL_SIGNATURES_H
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+
+namespace llvm {
+
+class WebAssemblySubtarget;
+
+extern void GetSignature(const WebAssemblySubtarget &Subtarget,
+ RTLIB::Libcall LC,
+ SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params);
+
+extern void GetSignature(const WebAssemblySubtarget &Subtarget,
+ const char *Name, SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params);
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
index 34ec6f2d34a7..a9aa781610ce 100644
--- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
@@ -154,7 +154,7 @@ static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
if (!callReturnsInput)
return false;
- LibFunc::Func Func;
+ LibFunc Func;
if (!LibInfo.getLibFunc(Name, Func))
return false;
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index f5ef35a2ad40..44c794ef5da1 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -74,13 +74,25 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
: "e-m:e-p:32:32-i64:64-n32:64-S128",
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
CM, OL),
- TLOF(make_unique<WebAssemblyTargetObjectFile>()) {
+ TLOF(TT.isOSBinFormatELF() ?
+ static_cast<TargetLoweringObjectFile*>(
+ new WebAssemblyTargetObjectFileELF()) :
+ static_cast<TargetLoweringObjectFile*>(
+ new WebAssemblyTargetObjectFile())) {
// WebAssembly type-checks instructions, but a noreturn function with a return
// type that doesn't match the context will cause a check failure. So we lower
// LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's
// 'unreachable' instructions which is meant for that case.
this->Options.TrapUnreachable = true;
+ // WebAssembly treats each function as an independent unit. Force
+ // -ffunction-sections, effectively, so that we can emit them independently.
+ if (!TT.isOSBinFormatELF()) {
+ this->Options.FunctionSections = true;
+ this->Options.DataSections = true;
+ this->Options.UniqueSectionNames = true;
+ }
+
initAsmInfo();
// Note that we don't use setRequiresStructuredCFG(true). It disables
@@ -260,13 +272,19 @@ void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyRegColoring());
}
+ // Eliminate multiple-entry loops. Do this before inserting explicit get_local
+ // and set_local operators because we create a new variable that we want
+ // converted into a local.
+ addPass(createWebAssemblyFixIrreducibleControlFlow());
+
// Insert explicit get_local and set_local operators.
addPass(createWebAssemblyExplicitLocals());
- // Eliminate multiple-entry loops.
- addPass(createWebAssemblyFixIrreducibleControlFlow());
+ // Sort the blocks of the CFG into topological order, a prerequisite for
+ // BLOCK and LOOP markers.
+ addPass(createWebAssemblyCFGSort());
- // Put the CFG in structured form; insert BLOCK and LOOP markers.
+ // Insert BLOCK and LOOP markers.
addPass(createWebAssemblyCFGStackify());
// Lower br_unless into br_if.
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
index 74e33b93e00d..b1fd108bc249 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
@@ -17,8 +17,14 @@
#include "WebAssemblyTargetMachine.h"
using namespace llvm;
-void WebAssemblyTargetObjectFile::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
+void WebAssemblyTargetObjectFileELF::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
}
+
+void WebAssemblyTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileWasm::Initialize(Ctx, TM);
+ InitializeWasm();
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
index 39e50c9c575d..ace87c9e442f 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
@@ -20,7 +20,13 @@
namespace llvm {
-class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFileELF {
+class WebAssemblyTargetObjectFileELF final
+ : public TargetLoweringObjectFileELF {
+public:
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+};
+
+class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFileWasm {
public:
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
};
diff --git a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index a0049c147d2c..e32772d491cf 100644
--- a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -15,6 +15,7 @@
#include "WebAssemblyUtilities.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
using namespace llvm;
bool WebAssembly::isArgument(const MachineInstr &MI) {
@@ -69,3 +70,28 @@ bool WebAssembly::isChild(const MachineInstr &MI,
return TargetRegisterInfo::isVirtualRegister(Reg) &&
MFI.isVRegStackified(Reg);
}
+
+bool WebAssembly::isCallIndirect(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::CALL_INDIRECT_VOID:
+ case WebAssembly::CALL_INDIRECT_I32:
+ case WebAssembly::CALL_INDIRECT_I64:
+ case WebAssembly::CALL_INDIRECT_F32:
+ case WebAssembly::CALL_INDIRECT_F64:
+ case WebAssembly::CALL_INDIRECT_v16i8:
+ case WebAssembly::CALL_INDIRECT_v8i16:
+ case WebAssembly::CALL_INDIRECT_v4i32:
+ case WebAssembly::CALL_INDIRECT_v4f32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+MachineBasicBlock *llvm::LoopBottom(const MachineLoop *Loop) {
+ MachineBasicBlock *Bottom = Loop->getHeader();
+ for (MachineBasicBlock *MBB : Loop->blocks())
+ if (MBB->getNumber() > Bottom->getNumber())
+ Bottom = MBB;
+ return Bottom;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyUtilities.h b/lib/Target/WebAssembly/WebAssemblyUtilities.h
index eb114403d14e..595491f1bf5b 100644
--- a/lib/Target/WebAssembly/WebAssemblyUtilities.h
+++ b/lib/Target/WebAssembly/WebAssemblyUtilities.h
@@ -18,7 +18,9 @@
namespace llvm {
+class MachineBasicBlock;
class MachineInstr;
+class MachineLoop;
class WebAssemblyFunctionInfo;
namespace WebAssembly {
@@ -27,8 +29,15 @@ bool isArgument(const MachineInstr &MI);
bool isCopy(const MachineInstr &MI);
bool isTee(const MachineInstr &MI);
bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
+bool isCallIndirect(const MachineInstr &MI);
} // end namespace WebAssembly
+
+/// Return the "bottom" block of a loop. This differs from
+/// MachineLoop::getBottomBlock in that it works even if the loop is
+/// discontiguous.
+MachineBasicBlock *LoopBottom(const MachineLoop *Loop);
+
} // end namespace llvm
#endif
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index c38a7d1dd44d..788fac62626b 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -1,4 +1,4 @@
-//===-- X86AsmInstrumentation.cpp - Instrument X86 inline assembly C++ -*-===//
+//===-- X86AsmInstrumentation.cpp - Instrument X86 inline assembly --------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,24 +7,31 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "X86AsmInstrumentation.h"
-#include "MCTargetDesc/X86BaseInfo.h"
#include "X86Operand.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SMLoc.h"
#include <algorithm>
#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <memory>
#include <vector>
// Following comment describes how assembly instrumentation works.
@@ -91,30 +98,35 @@
// register as a frame register and temprorary override current CFA
// register.
-namespace llvm {
-namespace {
+using namespace llvm;
static cl::opt<bool> ClAsanInstrumentAssembly(
"asan-instrument-assembly",
cl::desc("instrument assembly with AddressSanitizer checks"), cl::Hidden,
cl::init(false));
-const int64_t MinAllowedDisplacement = std::numeric_limits<int32_t>::min();
-const int64_t MaxAllowedDisplacement = std::numeric_limits<int32_t>::max();
+static const int64_t MinAllowedDisplacement =
+ std::numeric_limits<int32_t>::min();
+static const int64_t MaxAllowedDisplacement =
+ std::numeric_limits<int32_t>::max();
-int64_t ApplyDisplacementBounds(int64_t Displacement) {
+static int64_t ApplyDisplacementBounds(int64_t Displacement) {
return std::max(std::min(MaxAllowedDisplacement, Displacement),
MinAllowedDisplacement);
}
-void CheckDisplacementBounds(int64_t Displacement) {
+static void CheckDisplacementBounds(int64_t Displacement) {
assert(Displacement >= MinAllowedDisplacement &&
Displacement <= MaxAllowedDisplacement);
}
-bool IsStackReg(unsigned Reg) { return Reg == X86::RSP || Reg == X86::ESP; }
+static bool IsStackReg(unsigned Reg) {
+ return Reg == X86::RSP || Reg == X86::ESP;
+}
-bool IsSmallMemAccess(unsigned AccessSize) { return AccessSize < 8; }
+static bool IsSmallMemAccess(unsigned AccessSize) { return AccessSize < 8; }
+
+namespace {
class X86AddressSanitizer : public X86AsmInstrumentation {
public:
@@ -178,7 +190,7 @@ public:
X86AddressSanitizer(const MCSubtargetInfo *&STI)
: X86AsmInstrumentation(STI), RepPrefix(false), OrigSPOffset(0) {}
- ~X86AddressSanitizer() override {}
+ ~X86AddressSanitizer() override = default;
// X86AsmInstrumentation implementation:
void InstrumentAndEmitInstruction(const MCInst &Inst,
@@ -255,9 +267,11 @@ protected:
bool is64BitMode() const {
return STI->getFeatureBits()[X86::Mode64Bit];
}
+
bool is32BitMode() const {
return STI->getFeatureBits()[X86::Mode32Bit];
}
+
bool is16BitMode() const {
return STI->getFeatureBits()[X86::Mode16Bit];
}
@@ -498,7 +512,7 @@ public:
X86AddressSanitizer32(const MCSubtargetInfo *&STI)
: X86AddressSanitizer(STI) {}
- ~X86AddressSanitizer32() override {}
+ ~X86AddressSanitizer32() override = default;
unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
@@ -604,9 +618,9 @@ private:
EmitInstruction(
Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(32)));
- MCSymbol *FnSym = Ctx.getOrCreateSymbol(llvm::Twine("__asan_report_") +
+ MCSymbol *FnSym = Ctx.getOrCreateSymbol(Twine("__asan_report_") +
(IsWrite ? "store" : "load") +
- llvm::Twine(AccessSize));
+ Twine(AccessSize));
const MCSymbolRefExpr *FnExpr =
MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr));
@@ -756,7 +770,7 @@ public:
X86AddressSanitizer64(const MCSubtargetInfo *&STI)
: X86AddressSanitizer(STI) {}
- ~X86AddressSanitizer64() override {}
+ ~X86AddressSanitizer64() override = default;
unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
@@ -875,15 +889,17 @@ private:
EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RDI).addReg(
RegCtx.AddressReg(64)));
}
- MCSymbol *FnSym = Ctx.getOrCreateSymbol(llvm::Twine("__asan_report_") +
+ MCSymbol *FnSym = Ctx.getOrCreateSymbol(Twine("__asan_report_") +
(IsWrite ? "store" : "load") +
- llvm::Twine(AccessSize));
+ Twine(AccessSize));
const MCSymbolRefExpr *FnExpr =
MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr));
}
};
+} // end anonymous namespace
+
void X86AddressSanitizer64::InstrumentMemOperandSmall(
X86Operand &Op, unsigned AccessSize, bool IsWrite,
const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
@@ -1022,12 +1038,10 @@ void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize,
RestoreFlags(Out);
}
-} // End anonymous namespace
-
X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo *&STI)
- : STI(STI), InitialFrameReg(0) {}
+ : STI(STI) {}
-X86AsmInstrumentation::~X86AsmInstrumentation() {}
+X86AsmInstrumentation::~X86AsmInstrumentation() = default;
void X86AsmInstrumentation::InstrumentAndEmitInstruction(
const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
@@ -1060,8 +1074,9 @@ unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
}
X86AsmInstrumentation *
-CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
- const MCContext &Ctx, const MCSubtargetInfo *&STI) {
+llvm::CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
+ const MCContext &Ctx,
+ const MCSubtargetInfo *&STI) {
Triple T(STI->getTargetTriple());
const bool hasCompilerRTSupport = T.isOSLinux();
if (ClAsanInstrumentAssembly && hasCompilerRTSupport &&
@@ -1073,5 +1088,3 @@ CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
}
return new X86AsmInstrumentation(STI);
}
-
-} // end llvm namespace
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 470ceadb0aa6..97a55cd8ad98 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -1,4 +1,4 @@
-//===- X86AsmInstrumentation.h - Instrument X86 inline assembly *- C++ -*-===//
+//===- X86AsmInstrumentation.h - Instrument X86 inline assembly -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,7 +11,6 @@
#define LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
#include "llvm/ADT/SmallVector.h"
-
#include <memory>
namespace llvm {
@@ -23,7 +22,6 @@ class MCParsedAsmOperand;
class MCStreamer;
class MCSubtargetInfo;
class MCTargetOptions;
-
class X86AsmInstrumentation;
X86AsmInstrumentation *
@@ -43,7 +41,7 @@ public:
// Tries to instrument and emit instruction.
virtual void InstrumentAndEmitInstruction(
const MCInst &Inst,
- SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand> > &Operands,
+ SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> &Operands,
MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
protected:
@@ -60,9 +58,9 @@ protected:
const MCSubtargetInfo *&STI;
- unsigned InitialFrameReg;
+ unsigned InitialFrameReg = 0;
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e692118f47fd..324da650e74e 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -98,6 +98,14 @@ private:
IC_REGISTER
};
+ enum IntelOperatorKind {
+ IOK_INVALID = 0,
+ IOK_LENGTH,
+ IOK_SIZE,
+ IOK_TYPE,
+ IOK_OFFSET
+ };
+
class InfixCalculator {
typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
@@ -704,10 +712,12 @@ private:
std::unique_ptr<X86Operand> ParseIntelOperand();
std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
- std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
+ unsigned IdentifyIntelOperator(StringRef Name);
+ unsigned ParseIntelOperator(unsigned OpKind);
std::unique_ptr<X86Operand>
ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
+ bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
std::unique_ptr<X86Operand>
ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp,
@@ -814,6 +824,7 @@ private:
/// }
public:
+
X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
const MCInstrInfo &mii, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr),
@@ -1266,10 +1277,12 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
}
}
// Remove all the ImmPrefix rewrites within the brackets.
+ // We may have some Imm rewrties as a result of an operator applying,
+ // remove them as well
for (AsmRewrite &AR : AsmRewrites) {
if (AR.Loc.getPointer() < StartInBrac.getPointer())
continue;
- if (AR.Kind == AOK_ImmPrefix)
+ if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm)
AR.Kind = AOK_Delete;
}
const char *SymLocPtr = SymName.data();
@@ -1286,6 +1299,30 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
}
}
+// Some binary bitwise operators have a named synonymous
+// Query a candidate string for being such a named operator
+// and if so - invoke the appropriate handler
+bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
+ // A named operator should be either lower or upper case, but not a mix
+ if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
+ return false;
+ if (Name.equals_lower("not"))
+ SM.onNot();
+ else if (Name.equals_lower("or"))
+ SM.onOr();
+ else if (Name.equals_lower("shl"))
+ SM.onLShift();
+ else if (Name.equals_lower("shr"))
+ SM.onRShift();
+ else if (Name.equals_lower("xor"))
+ SM.onXor();
+ else if (Name.equals_lower("and"))
+ SM.onAnd();
+ else
+ return false;
+ return true;
+}
+
bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
@@ -1324,31 +1361,36 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
const MCExpr *Val;
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
+ UpdateLocLex = false;
if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
SM.onRegister(TmpReg);
- UpdateLocLex = false;
- break;
+ } else if (ParseIntelNamedOperator(Identifier, SM)) {
+ UpdateLocLex = true;
+ } else if (!isParsingInlineAsm()) {
+ if (getParser().parsePrimaryExpr(Val, End))
+ return Error(Tok.getLoc(), "Unexpected identifier!");
+ SM.onIdentifierExpr(Val, Identifier);
+ } else if (unsigned OpKind = IdentifyIntelOperator(Identifier)) {
+ if (OpKind == IOK_OFFSET)
+ return Error(IdentLoc, "Dealing OFFSET operator as part of"
+ "a compound immediate expression is yet to be supported");
+ int64_t Val = ParseIntelOperator(OpKind);
+ if (!Val)
+ return true;
+ StringRef ErrMsg;
+ if (SM.onInteger(Val, ErrMsg))
+ return Error(IdentLoc, ErrMsg);
+ } else if (Identifier.find('.') != StringRef::npos &&
+ PrevTK == AsmToken::RBrac) {
+ return false;
} else {
- if (!isParsingInlineAsm()) {
- if (getParser().parsePrimaryExpr(Val, End))
- return Error(Tok.getLoc(), "Unexpected identifier!");
- } else {
- // This is a dot operator, not an adjacent identifier.
- if (Identifier.find('.') != StringRef::npos &&
- PrevTK == AsmToken::RBrac) {
- return false;
- } else {
- InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
- if (ParseIntelIdentifier(Val, Identifier, Info,
- /*Unevaluated=*/false, End))
- return true;
- }
- }
+ InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/false, End))
+ return true;
SM.onIdentifierExpr(Val, Identifier);
- UpdateLocLex = false;
- break;
}
- return Error(Tok.getLoc(), "Unexpected identifier!");
+ break;
}
case AsmToken::Integer: {
StringRef ErrMsg;
@@ -1715,11 +1757,16 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
OffsetOfLoc, Identifier, Info.OpDecl);
}
-enum IntelOperatorKind {
- IOK_LENGTH,
- IOK_SIZE,
- IOK_TYPE
-};
+// Query a candidate string for being an Intel assembly operator
+// Report back its kind, or IOK_INVALID if does not evaluated as a known one
+unsigned X86AsmParser::IdentifyIntelOperator(StringRef Name) {
+ return StringSwitch<unsigned>(Name)
+ .Cases("TYPE","type",IOK_TYPE)
+ .Cases("SIZE","size",IOK_SIZE)
+ .Cases("LENGTH","length",IOK_LENGTH)
+ .Cases("OFFSET","offset",IOK_OFFSET)
+ .Default(IOK_INVALID);
+}
/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
/// returns the number of elements in an array. It returns the value 1 for
@@ -1727,7 +1774,7 @@ enum IntelOperatorKind {
/// variable. A variable's size is the product of its LENGTH and TYPE. The
/// TYPE operator returns the size of a C or C++ type or variable. If the
/// variable is an array, TYPE returns the size of a single element.
-std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
+unsigned X86AsmParser::ParseIntelOperator(unsigned OpKind) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc TypeLoc = Tok.getLoc();
@@ -1739,11 +1786,13 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
StringRef Identifier = Tok.getString();
if (ParseIntelIdentifier(Val, Identifier, Info,
/*Unevaluated=*/true, End))
- return nullptr;
-
- if (!Info.OpDecl)
- return ErrorOperand(Start, "unable to lookup expression");
+ return 0;
+ if (!Info.OpDecl) {
+ Error(Start, "unable to lookup expression");
+ return 0;
+ }
+
unsigned CVal = 0;
switch(OpKind) {
default: llvm_unreachable("Unexpected operand kind!");
@@ -1757,8 +1806,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
unsigned Len = End.getPointer() - TypeLoc.getPointer();
InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal);
- const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
- return X86Operand::CreateImm(Imm, Start, End);
+ return CVal;
}
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
@@ -1766,18 +1814,12 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
- // Offset, length, type and size operators.
- if (isParsingInlineAsm()) {
- StringRef AsmTokStr = Tok.getString();
- if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
+ // FIXME: Offset operator
+ // Should be handled as part of immediate expression, as other operators
+ // Currently, only supported as a stand-alone operand
+ if (isParsingInlineAsm())
+ if (IdentifyIntelOperator(Tok.getString()) == IOK_OFFSET)
return ParseIntelOffsetOfOperator();
- if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
- return ParseIntelOperator(IOK_LENGTH);
- if (AsmTokStr == "size" || AsmTokStr == "SIZE")
- return ParseIntelOperator(IOK_SIZE);
- if (AsmTokStr == "type" || AsmTokStr == "TYPE")
- return ParseIntelOperator(IOK_TYPE);
- }
bool PtrInOperand = false;
unsigned Size = getIntelMemOperandSize(Tok.getString());
@@ -2360,7 +2402,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Name == "lock" || Name == "rep" ||
Name == "repe" || Name == "repz" ||
Name == "repne" || Name == "repnz" ||
- Name == "rex64" || Name == "data16";
+ Name == "rex64" || Name == "data16" || Name == "data32";
bool CurlyAsEndOfStatement = false;
// This does the actual operand parsing. Don't parse any more if we have a
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index 9db1a8483bee..9f1fa6c65907 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -1,4 +1,4 @@
-//===-- X86Operand.h - Parsed X86 machine instruction --------------------===//
+//===- X86Operand.h - Parsed X86 machine instruction ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,12 +11,17 @@
#define LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H
#include "X86AsmParserCommon.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/ADT/STLExtras.h"
-#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SMLoc.h"
+#include <cassert>
+#include <memory>
namespace llvm {
@@ -74,11 +79,14 @@ struct X86Operand : public MCParsedAsmOperand {
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const override { return StartLoc; }
+
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const override { return EndLoc; }
+
/// getLocRange - Get the range between the first and last token of this
/// operand.
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
+
/// getOffsetOfLoc - Get the location of the offset operator.
SMLoc getOffsetOfLoc() const override { return OffsetOfLoc; }
@@ -271,6 +279,9 @@ struct X86Operand : public MCParsedAsmOperand {
bool isMem256_RC256X() const {
return isMem256() && isMemIndexReg(X86::YMM0, X86::YMM31);
}
+ bool isMem256_RC512() const {
+ return isMem256() && isMemIndexReg(X86::ZMM0, X86::ZMM31);
+ }
bool isMem512_RC256X() const {
return isMem512() && isMemIndexReg(X86::YMM0, X86::YMM31);
}
@@ -419,10 +430,12 @@ struct X86Operand : public MCParsedAsmOperand {
RegNo = getGR32FromGR64(RegNo);
Inst.addOperand(MCOperand::createReg(RegNo));
}
+
void addAVX512RCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
+
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
@@ -451,6 +464,7 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
Inst.addOperand(MCOperand::createReg(getMemSegReg()));
}
+
void addDstIdxOperands(MCInst &Inst, unsigned N) const {
assert((N == 1) && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
@@ -541,6 +555,6 @@ struct X86Operand : public MCParsedAsmOperand {
}
};
-} // End of namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 9dfd09022bdc..fc4adddc149b 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -10,11 +10,20 @@ tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel)
tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables)
+if(LLVM_BUILD_GLOBAL_ISEL)
+ tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank)
+ tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel)
+endif()
+
add_public_tablegen_target(X86CommonTableGen)
# Add GlobalISel files if the build option was enabled.
set(GLOBAL_ISEL_FILES
X86CallLowering.cpp
+ X86LegalizerInfo.cpp
+ X86RegisterBankInfo.cpp
+ X86InstructionSelector.cpp
)
if(LLVM_BUILD_GLOBAL_ISEL)
@@ -43,6 +52,7 @@ set(sources
X86EvexToVex.cpp
X86MCInstLower.cpp
X86MachineFunctionInfo.cpp
+ X86MacroFusion.cpp
X86OptimizeLEAs.cpp
X86PadShortFunction.cpp
X86RegisterInfo.cpp
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 0871888bbfcd..36ad23bb41c0 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -368,32 +368,49 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
bool isBranch = false;
uint64_t pcrel = 0;
- if (type == TYPE_RELv) {
+ if (type == TYPE_REL) {
isBranch = true;
pcrel = insn.startLocation +
insn.immediateOffset + insn.immediateSize;
- switch (insn.displacementSize) {
+ switch (operand.encoding) {
default:
break;
- case 1:
+ case ENCODING_Iv:
+ switch (insn.displacementSize) {
+ default:
+ break;
+ case 1:
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
+ break;
+ case 2:
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
+ case 4:
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
+ break;
+ case 8:
+ break;
+ }
+ break;
+ case ENCODING_IB:
if(immediate & 0x80)
immediate |= ~(0xffull);
break;
- case 2:
+ case ENCODING_IW:
if(immediate & 0x8000)
immediate |= ~(0xffffull);
break;
- case 4:
+ case ENCODING_ID:
if(immediate & 0x80000000)
immediate |= ~(0xffffffffull);
break;
- case 8:
- break;
}
}
// By default sign-extend all X86 immediates based on their encoding.
- else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
- type == TYPE_IMM64 || type == TYPE_IMMv) {
+ else if (type == TYPE_IMM) {
switch (operand.encoding) {
default:
break;
@@ -620,38 +637,17 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
}
switch (type) {
- case TYPE_XMM32:
- case TYPE_XMM64:
- case TYPE_XMM128:
+ case TYPE_XMM:
mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
return;
- case TYPE_XMM256:
+ case TYPE_YMM:
mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
return;
- case TYPE_XMM512:
+ case TYPE_ZMM:
mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
return;
case TYPE_BNDR:
mcInst.addOperand(MCOperand::createReg(X86::BND0 + (immediate >> 4)));
- case TYPE_REL8:
- isBranch = true;
- pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
- if (immediate & 0x80)
- immediate |= ~(0xffull);
- break;
- case TYPE_REL16:
- isBranch = true;
- pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
- if (immediate & 0x8000)
- immediate |= ~(0xffffull);
- break;
- case TYPE_REL32:
- case TYPE_REL64:
- isBranch = true;
- pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
- if(immediate & 0x80000000)
- immediate |= ~(0xffffffffull);
- break;
default:
// operand is 64 bits wide. Do nothing.
break;
@@ -662,8 +658,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
mcInst, Dis))
mcInst.addOperand(MCOperand::createImm(immediate));
- if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 ||
- type == TYPE_MOFFS32 || type == TYPE_MOFFS64) {
+ if (type == TYPE_MOFFS) {
MCOperand segmentReg;
segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
mcInst.addOperand(segmentReg);
@@ -767,7 +762,27 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
Opcode == X86::VPGATHERDQYrm ||
Opcode == X86::VPGATHERQQrm ||
Opcode == X86::VPGATHERDDrm ||
- Opcode == X86::VPGATHERQDrm);
+ Opcode == X86::VPGATHERQDrm ||
+ Opcode == X86::VGATHERDPDZ128rm ||
+ Opcode == X86::VGATHERDPDZ256rm ||
+ Opcode == X86::VGATHERDPSZ128rm ||
+ Opcode == X86::VGATHERQPDZ128rm ||
+ Opcode == X86::VGATHERQPSZ128rm ||
+ Opcode == X86::VPGATHERDDZ128rm ||
+ Opcode == X86::VPGATHERDQZ128rm ||
+ Opcode == X86::VPGATHERDQZ256rm ||
+ Opcode == X86::VPGATHERQDZ128rm ||
+ Opcode == X86::VPGATHERQQZ128rm ||
+ Opcode == X86::VSCATTERDPDZ128mr ||
+ Opcode == X86::VSCATTERDPDZ256mr ||
+ Opcode == X86::VSCATTERDPSZ128mr ||
+ Opcode == X86::VSCATTERQPDZ128mr ||
+ Opcode == X86::VSCATTERQPSZ128mr ||
+ Opcode == X86::VPSCATTERDDZ128mr ||
+ Opcode == X86::VPSCATTERDQZ128mr ||
+ Opcode == X86::VPSCATTERDQZ256mr ||
+ Opcode == X86::VPSCATTERQDZ128mr ||
+ Opcode == X86::VPSCATTERQQZ128mr);
bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
Opcode == X86::VGATHERDPSYrm ||
Opcode == X86::VGATHERQPSYrm ||
@@ -775,13 +790,49 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
Opcode == X86::VPGATHERDQZrm ||
Opcode == X86::VPGATHERQQYrm ||
Opcode == X86::VPGATHERDDYrm ||
- Opcode == X86::VPGATHERQDYrm);
+ Opcode == X86::VPGATHERQDYrm ||
+ Opcode == X86::VGATHERDPSZ256rm ||
+ Opcode == X86::VGATHERQPDZ256rm ||
+ Opcode == X86::VGATHERQPSZ256rm ||
+ Opcode == X86::VPGATHERDDZ256rm ||
+ Opcode == X86::VPGATHERQQZ256rm ||
+ Opcode == X86::VPGATHERQDZ256rm ||
+ Opcode == X86::VSCATTERDPDZmr ||
+ Opcode == X86::VPSCATTERDQZmr ||
+ Opcode == X86::VSCATTERDPSZ256mr ||
+ Opcode == X86::VSCATTERQPDZ256mr ||
+ Opcode == X86::VSCATTERQPSZ256mr ||
+ Opcode == X86::VPSCATTERDDZ256mr ||
+ Opcode == X86::VPSCATTERQQZ256mr ||
+ Opcode == X86::VPSCATTERQDZ256mr ||
+ Opcode == X86::VGATHERPF0DPDm ||
+ Opcode == X86::VGATHERPF1DPDm ||
+ Opcode == X86::VSCATTERPF0DPDm ||
+ Opcode == X86::VSCATTERPF1DPDm);
bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm ||
Opcode == X86::VGATHERDPSZrm ||
Opcode == X86::VGATHERQPSZrm ||
Opcode == X86::VPGATHERQQZrm ||
Opcode == X86::VPGATHERDDZrm ||
- Opcode == X86::VPGATHERQDZrm);
+ Opcode == X86::VPGATHERQDZrm ||
+ Opcode == X86::VSCATTERQPDZmr ||
+ Opcode == X86::VSCATTERDPSZmr ||
+ Opcode == X86::VSCATTERQPSZmr ||
+ Opcode == X86::VPSCATTERQQZmr ||
+ Opcode == X86::VPSCATTERDDZmr ||
+ Opcode == X86::VPSCATTERQDZmr ||
+ Opcode == X86::VGATHERPF0DPSm ||
+ Opcode == X86::VGATHERPF0QPDm ||
+ Opcode == X86::VGATHERPF0QPSm ||
+ Opcode == X86::VGATHERPF1DPSm ||
+ Opcode == X86::VGATHERPF1QPDm ||
+ Opcode == X86::VGATHERPF1QPSm ||
+ Opcode == X86::VSCATTERPF0DPSm ||
+ Opcode == X86::VSCATTERPF0QPDm ||
+ Opcode == X86::VSCATTERPF0QPSm ||
+ Opcode == X86::VSCATTERPF1DPSm ||
+ Opcode == X86::VSCATTERPF1QPDm ||
+ Opcode == X86::VSCATTERPF1QPSm);
if (IndexIs128 || IndexIs256 || IndexIs512) {
unsigned IndexOffset = insn.sibIndex -
(insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
@@ -909,38 +960,15 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_R64:
case TYPE_Rv:
case TYPE_MM64:
- case TYPE_XMM32:
- case TYPE_XMM64:
- case TYPE_XMM128:
- case TYPE_XMM256:
- case TYPE_XMM512:
- case TYPE_VK1:
- case TYPE_VK2:
- case TYPE_VK4:
- case TYPE_VK8:
- case TYPE_VK16:
- case TYPE_VK32:
- case TYPE_VK64:
+ case TYPE_XMM:
+ case TYPE_YMM:
+ case TYPE_ZMM:
+ case TYPE_VK:
case TYPE_DEBUGREG:
case TYPE_CONTROLREG:
case TYPE_BNDR:
return translateRMRegister(mcInst, insn);
case TYPE_M:
- case TYPE_M8:
- case TYPE_M16:
- case TYPE_M32:
- case TYPE_M64:
- case TYPE_M128:
- case TYPE_M256:
- case TYPE_M512:
- case TYPE_Mv:
- case TYPE_M32FP:
- case TYPE_M64FP:
- case TYPE_M80FP:
- case TYPE_M1616:
- case TYPE_M1632:
- case TYPE_M1664:
- case TYPE_LEA:
return translateRMMemory(mcInst, insn, Dis);
}
}
@@ -992,6 +1020,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
case ENCODING_WRITEMASK:
return translateMaskRegister(mcInst, insn.writemask);
CASE_ENCODING_RM:
+ CASE_ENCODING_VSIB:
return translateRM(mcInst, operand, insn, Dis);
case ENCODING_IB:
case ENCODING_IW:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index ab64d6fcf70b..b7f637e9a8cd 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -650,11 +650,6 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->addressSize = (hasAdSize ? 4 : 8);
insn->displacementSize = 4;
insn->immediateSize = 4;
- } else if (insn->rexPrefix) {
- insn->registerSize = (hasOpSize ? 2 : 4);
- insn->addressSize = (hasAdSize ? 4 : 8);
- insn->displacementSize = (hasOpSize ? 2 : 4);
- insn->immediateSize = (hasOpSize ? 2 : 4);
} else {
insn->registerSize = (hasOpSize ? 2 : 4);
insn->addressSize = (hasAdSize ? 4 : 8);
@@ -1475,21 +1470,13 @@ static int readModRM(struct InternalInstruction* insn) {
return prefix##_EAX + index; \
case TYPE_R64: \
return prefix##_RAX + index; \
- case TYPE_XMM512: \
+ case TYPE_ZMM: \
return prefix##_ZMM0 + index; \
- case TYPE_XMM256: \
+ case TYPE_YMM: \
return prefix##_YMM0 + index; \
- case TYPE_XMM128: \
- case TYPE_XMM64: \
- case TYPE_XMM32: \
+ case TYPE_XMM: \
return prefix##_XMM0 + index; \
- case TYPE_VK1: \
- case TYPE_VK2: \
- case TYPE_VK4: \
- case TYPE_VK8: \
- case TYPE_VK16: \
- case TYPE_VK32: \
- case TYPE_VK64: \
+ case TYPE_VK: \
if (index > 7) \
*valid = 0; \
return prefix##_K0 + index; \
@@ -1562,6 +1549,7 @@ static int fixupReg(struct InternalInstruction *insn,
return -1;
break;
CASE_ENCODING_RM:
+ CASE_ENCODING_VSIB:
if (insn->eaBase >= insn->eaRegBase) {
insn->eaBase = (EABase)fixupRMValue(insn,
(OperandType)op->type,
@@ -1753,6 +1741,18 @@ static int readOperands(struct InternalInstruction* insn) {
case ENCODING_SI:
case ENCODING_DI:
break;
+ CASE_ENCODING_VSIB:
+ // VSIB can use the V2 bit so check only the other bits.
+ if (needVVVV)
+ needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
+ if (readModRM(insn))
+ return -1;
+ if (fixupReg(insn, &Op))
+ return -1;
+ // Apply the AVX512 compressed displacement scaling factor.
+ if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
+ insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
+ break;
case ENCODING_REG:
CASE_ENCODING_RM:
if (readModRM(insn))
@@ -1774,8 +1774,7 @@ static int readOperands(struct InternalInstruction* insn) {
}
if (readImmediate(insn, 1))
return -1;
- if (Op.type == TYPE_XMM128 ||
- Op.type == TYPE_XMM256)
+ if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
sawRegImm = 1;
break;
case ENCODING_IW:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 0a835b876d90..e0f4399b3687 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -339,6 +339,15 @@ enum ModRMDecisionType {
case ENCODING_RM_CD32: \
case ENCODING_RM_CD64
+#define CASE_ENCODING_VSIB \
+ case ENCODING_VSIB: \
+ case ENCODING_VSIB_CD2: \
+ case ENCODING_VSIB_CD4: \
+ case ENCODING_VSIB_CD8: \
+ case ENCODING_VSIB_CD16: \
+ case ENCODING_VSIB_CD32: \
+ case ENCODING_VSIB_CD64
+
// Physical encodings of instruction operands.
#define ENCODINGS \
ENUM_ENTRY(ENCODING_NONE, "") \
@@ -350,6 +359,13 @@ enum ModRMDecisionType {
ENUM_ENTRY(ENCODING_RM_CD16,"R/M operand with CDisp scaling of 16") \
ENUM_ENTRY(ENCODING_RM_CD32,"R/M operand with CDisp scaling of 32") \
ENUM_ENTRY(ENCODING_RM_CD64,"R/M operand with CDisp scaling of 64") \
+ ENUM_ENTRY(ENCODING_VSIB, "VSIB operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_VSIB_CD2, "VSIB operand with CDisp scaling of 2") \
+ ENUM_ENTRY(ENCODING_VSIB_CD4, "VSIB operand with CDisp scaling of 4") \
+ ENUM_ENTRY(ENCODING_VSIB_CD8, "VSIB operand with CDisp scaling of 8") \
+ ENUM_ENTRY(ENCODING_VSIB_CD16,"VSIB operand with CDisp scaling of 16") \
+ ENUM_ENTRY(ENCODING_VSIB_CD32,"VSIB operand with CDisp scaling of 32") \
+ ENUM_ENTRY(ENCODING_VSIB_CD64,"VSIB operand with CDisp scaling of 64") \
ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \
ENUM_ENTRY(ENCODING_WRITEMASK, "Register operand in EVEX.aaa byte.") \
ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \
@@ -383,85 +399,38 @@ enum OperandEncoding {
// Semantic interpretations of instruction operands.
#define TYPES \
ENUM_ENTRY(TYPE_NONE, "") \
- ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \
- ENUM_ENTRY(TYPE_REL16, "2-byte") \
- ENUM_ENTRY(TYPE_REL32, "4-byte") \
- ENUM_ENTRY(TYPE_REL64, "8-byte") \
- ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \
- ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \
- ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \
+ ENUM_ENTRY(TYPE_REL, "immediate address") \
ENUM_ENTRY(TYPE_R8, "1-byte register operand") \
ENUM_ENTRY(TYPE_R16, "2-byte") \
ENUM_ENTRY(TYPE_R32, "4-byte") \
ENUM_ENTRY(TYPE_R64, "8-byte") \
- ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \
- ENUM_ENTRY(TYPE_IMM16, "2-byte") \
- ENUM_ENTRY(TYPE_IMM32, "4-byte") \
- ENUM_ENTRY(TYPE_IMM64, "8-byte") \
+ ENUM_ENTRY(TYPE_IMM, "immediate operand") \
ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \
ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \
ENUM_ENTRY(TYPE_AVX512ICC, "1-byte immediate operand for AVX512 icmp") \
ENUM_ENTRY(TYPE_UIMM8, "1-byte unsigned immediate operand") \
- ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
- ENUM_ENTRY(TYPE_RM16, "2-byte") \
- ENUM_ENTRY(TYPE_RM32, "4-byte") \
- ENUM_ENTRY(TYPE_RM64, "8-byte") \
ENUM_ENTRY(TYPE_M, "Memory operand") \
- ENUM_ENTRY(TYPE_M8, "1-byte") \
- ENUM_ENTRY(TYPE_M16, "2-byte") \
- ENUM_ENTRY(TYPE_M32, "4-byte") \
- ENUM_ENTRY(TYPE_M64, "8-byte") \
- ENUM_ENTRY(TYPE_LEA, "Effective address") \
- ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
- ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \
- ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
- ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
- ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
- ENUM_ENTRY(TYPE_SRCIDX8, "1-byte memory at source index") \
- ENUM_ENTRY(TYPE_SRCIDX16, "2-byte memory at source index") \
- ENUM_ENTRY(TYPE_SRCIDX32, "4-byte memory at source index") \
- ENUM_ENTRY(TYPE_SRCIDX64, "8-byte memory at source index") \
- ENUM_ENTRY(TYPE_DSTIDX8, "1-byte memory at destination index") \
- ENUM_ENTRY(TYPE_DSTIDX16, "2-byte memory at destination index") \
- ENUM_ENTRY(TYPE_DSTIDX32, "4-byte memory at destination index") \
- ENUM_ENTRY(TYPE_DSTIDX64, "8-byte memory at destination index") \
- ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \
- "base)") \
- ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \
- ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \
- ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \
- ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \
- ENUM_ENTRY(TYPE_M64FP, "64-bit") \
- ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \
+ ENUM_ENTRY(TYPE_SRCIDX, "memory at source index") \
+ ENUM_ENTRY(TYPE_DSTIDX, "memory at destination index") \
+ ENUM_ENTRY(TYPE_MOFFS, "memory offset (relative to segment base)") \
ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \
ENUM_ENTRY(TYPE_MM64, "8-byte MMX register") \
- ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
- ENUM_ENTRY(TYPE_XMM64, "8-byte") \
- ENUM_ENTRY(TYPE_XMM128, "16-byte") \
- ENUM_ENTRY(TYPE_XMM256, "32-byte") \
- ENUM_ENTRY(TYPE_XMM512, "64-byte") \
- ENUM_ENTRY(TYPE_VK1, "1-bit") \
- ENUM_ENTRY(TYPE_VK2, "2-bit") \
- ENUM_ENTRY(TYPE_VK4, "4-bit") \
- ENUM_ENTRY(TYPE_VK8, "8-bit") \
- ENUM_ENTRY(TYPE_VK16, "16-bit") \
- ENUM_ENTRY(TYPE_VK32, "32-bit") \
- ENUM_ENTRY(TYPE_VK64, "64-bit") \
+ ENUM_ENTRY(TYPE_XMM, "16-byte") \
+ ENUM_ENTRY(TYPE_YMM, "32-byte") \
+ ENUM_ENTRY(TYPE_ZMM, "64-byte") \
+ ENUM_ENTRY(TYPE_VK, "mask register") \
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
ENUM_ENTRY(TYPE_BNDR, "MPX bounds register") \
\
- ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
- ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \
ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \
ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \
ENUM_ENTRY(TYPE_DUP1, "operand 1") \
ENUM_ENTRY(TYPE_DUP2, "operand 2") \
ENUM_ENTRY(TYPE_DUP3, "operand 3") \
ENUM_ENTRY(TYPE_DUP4, "operand 4") \
- ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
#define ENUM_ENTRY(n, d) n,
enum OperandType {
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 10b7e6ff5ee2..6aa700306744 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -12,19 +12,22 @@
//
//===----------------------------------------------------------------------===//
-#include "X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
-#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86ATTInstPrinter.h"
#include "X86InstComments.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -61,6 +64,17 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
OS << "\tcallq\t";
printPCRelImm(MI, 0, OS);
}
+ // data16 and data32 both have the same encoding of 0x66. While data32 is
+ // valid only in 16 bit systems, data16 is valid in the rest.
+ // There seems to be some lack of support of the Requires clause that causes
+ // 0x66 to be interpreted as "data16" by the asm printer.
+ // Thus we add an adjustment here in order to print the "right" instruction.
+ else if (MI->getOpcode() == X86::DATA16_PREFIX &&
+ (STI.getFeatureBits()[X86::Mode16Bit])) {
+ MCInst Data32MI(*MI);
+ Data32MI.setOpcode(X86::DATA32_PREFIX);
+ printInstruction(&Data32MI, OS);
+ }
// Try to print any aliases first.
else if (!printAliasInstr(MI, OS))
printInstruction(MI, OS);
@@ -135,6 +149,7 @@ void X86ATTInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op,
case 3: O << "{rz-sae}"; break;
}
}
+
/// printPCRelImm - This is used to print an immediate value that ends up
/// being encoded as a pc-relative value (e.g. for jumps and calls). These
/// print slightly differently than normal immediates. For example, a $ is not
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index bbb309076610..946c1c73f088 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -1,4 +1,4 @@
-//==- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=//
+//=- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax --*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -137,6 +137,7 @@ public:
private:
bool HasCustomInstComment;
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 8594addb5dd4..6e062ec59347 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -1189,8 +1189,6 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
OS << ']';
--i; // For loop increments element #.
}
- //MI->print(OS, 0);
- OS << "\n";
// We successfully added a comment to this instruction.
return true;
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 4443edb8e342..a8c631ae282f 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -12,16 +12,18 @@
//
//===----------------------------------------------------------------------===//
-#include "X86IntelInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
-#include "MCTargetDesc/X86MCTargetDesc.h"
#include "X86InstComments.h"
+#include "X86IntelInstPrinter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include <cctype>
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 20cd7ffb2e63..ace31186a054 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -157,6 +157,6 @@ public:
}
};
-}
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_X86_INSTPRINTER_X86INTELINSTPRINTER_H
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index e83ec9f4045a..a713af6aadb5 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -109,7 +109,7 @@ public:
}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override {
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override {
unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
assert(Fixup.getOffset() + Size <= DataSize &&
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index aab552547fac..d8953da4abb2 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -212,7 +212,12 @@ namespace X86II {
/// the offset from beginning of section.
///
/// This is the TLS offset for the COFF/Windows TLS mechanism.
- MO_SECREL
+ MO_SECREL,
+
+ /// MO_ABS8 - On a symbol operand this indicates that the symbol is known
+ /// to be an absolute symbol in range [0,128), so we can use the @ABS8
+ /// symbol modifier.
+ MO_ABS8,
};
enum : uint64_t {
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index da69da51df10..0b73df3a2ff8 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -13,24 +13,28 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
namespace {
- class X86ELFObjectWriter : public MCELFObjectTargetWriter {
- public:
- X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine);
- ~X86ELFObjectWriter() override;
+class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine);
+ ~X86ELFObjectWriter() override = default;
- protected:
- unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
- const MCFixup &Fixup, bool IsPCRel) const override;
- };
-}
+protected:
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override;
+};
+
+} // end anonymous namespace
X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
uint16_t EMachine)
@@ -40,9 +44,6 @@ X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
(EMachine != ELF::EM_386) &&
(EMachine != ELF::EM_IAMCU)) {}
-X86ELFObjectWriter::~X86ELFObjectWriter()
-{}
-
enum X86_64RelType { RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 };
static X86_64RelType getType64(unsigned Kind,
@@ -96,6 +97,7 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
default:
llvm_unreachable("Unimplemented");
case MCSymbolRefExpr::VK_None:
+ case MCSymbolRefExpr::VK_X86_ABS8:
switch (Type) {
case RT64_64:
return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64;
@@ -219,6 +221,7 @@ static unsigned getRelocType32(MCContext &Ctx,
default:
llvm_unreachable("Unimplemented");
case MCSymbolRefExpr::VK_None:
+ case MCSymbolRefExpr::VK_X86_ABS8:
switch (Type) {
case RT32_32:
return IsPCRel ? ELF::R_386_PC32 : ELF::R_386_32;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 8045e7c6d872..10e2bbc64d3c 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -11,35 +11,43 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/X86MCTargetDesc.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
namespace {
+
class X86MCCodeEmitter : public MCCodeEmitter {
- X86MCCodeEmitter(const X86MCCodeEmitter &) = delete;
- void operator=(const X86MCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
MCContext &Ctx;
+
public:
X86MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: MCII(mcii), Ctx(ctx) {
}
-
- ~X86MCCodeEmitter() override {}
+ X86MCCodeEmitter(const X86MCCodeEmitter &) = delete;
+ X86MCCodeEmitter &operator=(const X86MCCodeEmitter &) = delete;
+ ~X86MCCodeEmitter() override = default;
bool is64BitMode(const MCSubtargetInfo &STI) const {
return STI.getFeatureBits()[X86::Mode64Bit];
@@ -106,8 +114,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
int ImmOffset = 0) const;
- inline static uint8_t ModRMByte(unsigned Mod, unsigned RegOpcode,
- unsigned RM) {
+ static uint8_t ModRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) {
assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
return RM | (RegOpcode << 3) | (Mod << 6);
}
@@ -149,12 +156,6 @@ public:
} // end anonymous namespace
-MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new X86MCCodeEmitter(MCII, Ctx);
-}
-
/// isDisp8 - Return true if this signed displacement fits in a 8-bit
/// sign-extended field.
static bool isDisp8(int Value) {
@@ -1436,7 +1437,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM0r: case X86II::MRM1r:
case X86II::MRM2r: case X86II::MRM3r:
case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r: {
+ case X86II::MRM6r: case X86II::MRM7r:
if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
++CurOp;
if (HasEVEX_K) // Skip writemask
@@ -1446,13 +1447,12 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
(Form == X86II::MRMXr) ? 0 : Form-X86II::MRM0r,
CurByte, OS);
break;
- }
case X86II::MRMXm:
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
+ case X86II::MRM6m: case X86II::MRM7m:
if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
++CurOp;
if (HasEVEX_K) // Skip writemask
@@ -1463,7 +1463,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
Rex, CurByte, OS, Fixups, STI);
CurOp += X86::AddrNumOperands;
break;
- }
+
case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C5:
case X86II::MRM_C6: case X86II::MRM_C7: case X86II::MRM_C8:
@@ -1527,3 +1527,9 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
}
#endif
}
+
+MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new X86MCCodeEmitter(MCII, Ctx);
+}
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 33376b6d1b90..d6777fc8aa6a 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -10,6 +10,7 @@
#include "MCTargetDesc/X86FixupKinds.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/Support/COFF.h"
@@ -17,28 +18,24 @@
using namespace llvm;
-namespace llvm {
- class MCObjectWriter;
-}
-
namespace {
- class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
- public:
- X86WinCOFFObjectWriter(bool Is64Bit);
- ~X86WinCOFFObjectWriter() override;
- unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsCrossSection,
- const MCAsmBackend &MAB) const override;
- };
-}
+class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
+public:
+ X86WinCOFFObjectWriter(bool Is64Bit);
+ ~X86WinCOFFObjectWriter() override = default;
+
+ unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsCrossSection,
+ const MCAsmBackend &MAB) const override;
+};
+
+} // end anonymous namespace
X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit)
: MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64
: COFF::IMAGE_FILE_MACHINE_I386) {}
-X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {}
-
unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
const MCFixup &Fixup,
bool IsCrossSection,
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 2cb80a482d06..fdcc7e1ab7b0 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -21,7 +21,10 @@ namespace llvm {
class FunctionPass;
class ImmutablePass;
+class InstructionSelector;
class PassRegistry;
+class X86RegisterBankInfo;
+class X86Subtarget;
class X86TargetMachine;
/// This pass converts a legalized DAG into a X86-specific DAG, ready for
@@ -92,6 +95,9 @@ void initializeFixupBWInstPassPass(PassRegistry &);
/// encoding when possible in order to reduce code size.
FunctionPass *createX86EvexToVexInsts();
+InstructionSelector *createX86InstructionSelector(X86Subtarget &,
+ X86RegisterBankInfo &);
+
void initializeEvexToVexInstPassPass(PassRegistry &);
} // End llvm namespace
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 83a23d4ad680..8fcc8e31d5d4 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -187,8 +187,6 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
"Support BMI2 instructions">;
def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
"Support RTM instructions">;
-def FeatureHLE : SubtargetFeature<"hle", "HasHLE", "true",
- "Support HLE">;
def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
"Support ADX instructions">;
def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
@@ -202,6 +200,8 @@ def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
"Support LAHF and SAHF instructions">;
def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
"Enable MONITORX/MWAITX timer functionality">;
+def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
+ "Enable Cache Line Zero">;
def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
"Support MPX instructions">;
def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
@@ -215,18 +215,10 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
-def FeatureINVPCID : SubtargetFeature<"invpcid", "HasInvPCId", "true",
- "Invalidate Process-Context Identifier">;
-def FeatureVMFUNC : SubtargetFeature<"vmfunc", "HasVMFUNC", "true",
- "VM Functions">;
-def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true",
- "Supervisor Mode Access Protection">;
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
"Enable Software Guard Extensions">;
def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
"Flush A Cache Line Optimized">;
-def FeaturePCOMMIT : SubtargetFeature<"pcommit", "HasPCOMMIT", "true",
- "Enable Persistent Commit">;
def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
"Cache Line Write Back">;
// TODO: This feature ought to be renamed.
@@ -246,11 +238,12 @@ def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
def FeatureSoftFloat
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software floating point features.">;
-// On at least some AMD processors, there is no performance hazard to writing
-// only the lower parts of a YMM register without clearing the upper part.
-def FeatureFastPartialYMMWrite
- : SubtargetFeature<"fast-partial-ymm-write", "HasFastPartialYMMWrite",
- "true", "Partial writes to YMM registers are fast">;
+// On some X86 processors, there is no performance hazard to writing only the
+// lower parts of a YMM or ZMM register without clearing the upper part.
+def FeatureFastPartialYMMorZMMWrite
+ : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
+ "HasFastPartialYMMorZMMWrite",
+ "true", "Partial writes to YMM/ZMM registers are fast">;
// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
// vector FSQRT has higher throughput than the corresponding NR code.
@@ -271,6 +264,15 @@ def FeatureFastLZCNT
"fast-lzcnt", "HasFastLZCNT", "true",
"LZCNT instructions are as fast as most simple integer ops">;
+
+// Sandy Bridge and newer processors can use SHLD with the same source on both
+// inputs to implement rotate to avoid the partial flag update of the normal
+// rotate instructions.
+def FeatureFastSHLDRotate
+ : SubtargetFeature<
+ "fast-shld-rotate", "HasFastSHLDRotate", "true",
+ "SHLD can be used as a faster rotate">;
+
//===----------------------------------------------------------------------===//
// X86 processors supported.
//===----------------------------------------------------------------------===//
@@ -466,7 +468,8 @@ def SNBFeatures : ProcessorFeatures<[], [
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureLAHFSAHF,
- FeatureFastScalarFSQRT
+ FeatureFastScalarFSQRT,
+ FeatureFastSHLDRotate
]>;
class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
@@ -498,10 +501,6 @@ def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
FeatureFMA,
FeatureLZCNT,
FeatureMOVBE,
- FeatureINVPCID,
- FeatureVMFUNC,
- FeatureRTM,
- FeatureHLE,
FeatureSlowIncDec
]>;
@@ -512,8 +511,7 @@ def : HaswellProc<"core-avx2">; // Legacy alias.
def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
FeatureADX,
- FeatureRDSEED,
- FeatureSMAP
+ FeatureRDSEED
]>;
class BroadwellProc<string Name> : ProcModel<Name, HaswellModel,
BDWFeatures.Value, []>;
@@ -521,6 +519,7 @@ def : BroadwellProc<"broadwell">;
def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
FeatureMPX,
+ FeatureRTM,
FeatureXSAVEC,
FeatureXSAVES,
FeatureSGX,
@@ -547,7 +546,8 @@ class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
- FeatureFMA
+ FeatureFMA,
+ FeatureFastPartialYMMorZMMWrite
]>;
def : KnightsLandingProc<"knl">;
@@ -558,7 +558,6 @@ def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
FeatureBWI,
FeatureVLX,
FeaturePKU,
- FeaturePCOMMIT,
FeatureCLWB
]>;
@@ -662,7 +661,7 @@ def : ProcessorModel<"btver2", BtVer2Model, [
FeatureXSAVEOPT,
FeatureSlowSHLD,
FeatureLAHFSAHF,
- FeatureFastPartialYMMWrite
+ FeatureFastPartialYMMorZMMWrite
]>;
// Bulldozer
@@ -771,6 +770,7 @@ def: ProcessorModel<"znver1", BtVer2Model, [
FeatureBMI,
FeatureBMI2,
FeatureCLFLUSHOPT,
+ FeatureCLZERO,
FeatureCMPXCHG16B,
FeatureF16C,
FeatureFMA,
@@ -788,7 +788,6 @@ def: ProcessorModel<"znver1", BtVer2Model, [
FeatureRDRAND,
FeatureRDSEED,
FeatureSHA,
- FeatureSMAP,
FeatureSSE4A,
FeatureSlowSHLD,
FeatureX87,
@@ -824,6 +823,7 @@ def : ProcessorModel<"x86-64", SandyBridgeModel,
//===----------------------------------------------------------------------===//
include "X86RegisterInfo.td"
+include "X86RegisterBanks.td"
//===----------------------------------------------------------------------===//
// Instruction Descriptions
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 6798253d0f6a..44bc373b0394 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -81,7 +81,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void LowerSTACKMAP(const MachineInstr &MI);
void LowerPATCHPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
- void LowerFAULTING_LOAD_OP(const MachineInstr &MI, X86MCInstLower &MCIL);
+ void LowerFAULTING_OP(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerPATCHABLE_OP(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI);
@@ -92,6 +92,8 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
+ void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
+
// Helper function that emits the XRay sleds we've collected for a particular
// function.
void EmitXRayTable();
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp
index 78bd2add8c3b..765af67de160 100644
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -17,22 +17,35 @@
//
//===----------------------------------------------------------------------===//
-#include <algorithm>
-
-#include "X86.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "X86FrameLowering.h"
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
@@ -44,6 +57,7 @@ static cl::opt<bool>
cl::init(false), cl::Hidden);
namespace {
+
class X86CallFrameOptimization : public MachineFunctionPass {
public:
X86CallFrameOptimization() : MachineFunctionPass(ID) {}
@@ -53,30 +67,28 @@ public:
private:
// Information we know about a particular call site
struct CallContext {
- CallContext()
- : FrameSetup(nullptr), Call(nullptr), SPCopy(nullptr), ExpectedDist(0),
- MovVector(4, nullptr), NoStackParams(false), UsePush(false) {}
+ CallContext() : FrameSetup(nullptr), MovVector(4, nullptr) {}
// Iterator referring to the frame setup instruction
MachineBasicBlock::iterator FrameSetup;
// Actual call instruction
- MachineInstr *Call;
+ MachineInstr *Call = nullptr;
// A copy of the stack pointer
- MachineInstr *SPCopy;
+ MachineInstr *SPCopy = nullptr;
// The total displacement of all passed parameters
- int64_t ExpectedDist;
+ int64_t ExpectedDist = 0;
// The sequence of movs used to pass the parameters
SmallVector<MachineInstr *, 4> MovVector;
// True if this call site has no stack parameters
- bool NoStackParams;
+ bool NoStackParams = false;
// True if this call site can use push instructions
- bool UsePush;
+ bool UsePush = false;
};
typedef SmallVector<CallContext, 8> ContextVector;
@@ -102,7 +114,7 @@ private:
StringRef getPassName() const override { return "X86 Optimize Call Frame"; }
- const TargetInstrInfo *TII;
+ const X86InstrInfo *TII;
const X86FrameLowering *TFL;
const X86Subtarget *STI;
MachineRegisterInfo *MRI;
@@ -112,11 +124,8 @@ private:
};
char X86CallFrameOptimization::ID = 0;
-} // end anonymous namespace
-FunctionPass *llvm::createX86CallFrameOptimization() {
- return new X86CallFrameOptimization();
-}
+} // end anonymous namespace
// This checks whether the transformation is legal.
// Also returns false in cases where it's potentially legal, but
@@ -322,7 +331,6 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
// transformation.
const X86RegisterInfo &RegInfo =
*static_cast<const X86RegisterInfo *>(STI->getRegisterInfo());
- unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
// We expect to enter this at the beginning of a call sequence
assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
@@ -331,8 +339,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
// How much do we adjust the stack? This puts an upper bound on
// the number of parameters actually passed on it.
- unsigned int MaxAdjust =
- FrameSetup->getOperand(0).getImm() >> Log2SlotSize;
+ unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize;
// A zero adjustment means no stack parameters
if (!MaxAdjust) {
@@ -425,7 +432,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
return;
Context.Call = &*I;
- if ((++I)->getOpcode() != FrameDestroyOpcode)
+ if ((++I)->getOpcode() != TII->getCallFrameDestroyOpcode())
return;
// Now, go through the vector, and see that we don't have any gaps,
@@ -455,7 +462,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
// PEI will end up finalizing the handling of this.
MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
MachineBasicBlock &MBB = *(FrameSetup->getParent());
- FrameSetup->getOperand(1).setImm(Context.ExpectedDist);
+ TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist);
DebugLoc DL = FrameSetup->getDebugLoc();
bool Is64Bit = STI->is64Bit();
@@ -482,11 +489,10 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
if (isInt<8>(Val))
PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8;
}
- Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
- .addOperand(PushOp);
+ Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).add(PushOp);
break;
case X86::MOV32mr:
- case X86::MOV64mr:
+ case X86::MOV64mr: {
unsigned int Reg = PushOp.getReg();
// If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg
@@ -496,9 +502,9 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
Reg = MRI->createVirtualRegister(&X86::GR64RegClass);
BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg);
BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg)
- .addReg(UndefReg)
- .addOperand(PushOp)
- .addImm(X86::sub_32bit);
+ .addReg(UndefReg)
+ .add(PushOp)
+ .addImm(X86::sub_32bit);
}
// If PUSHrmm is not slow on this target, try to fold the source of the
@@ -525,6 +531,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
}
break;
}
+ }
// For debugging, when using SP-based CFA, we need to adjust the CFA
// offset after each push.
@@ -584,3 +591,7 @@ MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
return &DefMI;
}
+
+FunctionPass *llvm::createX86CallFrameOptimization() {
+ return new X86CallFrameOptimization();
+}
diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp
index 5ae4962378d3..137ef166aaeb 100644
--- a/lib/Target/X86/X86CallLowering.cpp
+++ b/lib/Target/X86/X86CallLowering.cpp
@@ -14,12 +14,20 @@
//===----------------------------------------------------------------------===//
#include "X86CallLowering.h"
+#include "X86CallingConv.h"
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
+#include "X86TargetMachine.h"
+
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+#include "X86GenCallingConv.inc"
+
#ifndef LLVM_BUILD_GLOBAL_ISEL
#error "This shouldn't be built without GISel"
#endif
@@ -27,20 +35,183 @@ using namespace llvm;
X86CallLowering::X86CallLowering(const X86TargetLowering &TLI)
: CallLowering(&TLI) {}
+void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL,
+ MachineRegisterInfo &MRI,
+ SplitArgTy PerformArgSplit) const {
+
+ const X86TargetLowering &TLI = *getTLI<X86TargetLowering>();
+ LLVMContext &Context = OrigArg.Ty->getContext();
+ EVT VT = TLI.getValueType(DL, OrigArg.Ty);
+ unsigned NumParts = TLI.getNumRegisters(Context, VT);
+
+ if (NumParts == 1) {
+ // replace the original type ( pointer -> GPR ).
+ SplitArgs.emplace_back(OrigArg.Reg, VT.getTypeForEVT(Context),
+ OrigArg.Flags, OrigArg.IsFixed);
+ return;
+ }
+
+ SmallVector<uint64_t, 4> BitOffsets;
+ SmallVector<unsigned, 8> SplitRegs;
+
+ EVT PartVT = TLI.getRegisterType(Context, VT);
+ Type *PartTy = PartVT.getTypeForEVT(Context);
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ ArgInfo Info =
+ ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*PartTy, DL)),
+ PartTy, OrigArg.Flags};
+ SplitArgs.push_back(Info);
+ PerformArgSplit(Info.Reg, PartVT.getSizeInBits() * i);
+ }
+}
+
+namespace {
+struct FuncReturnHandler : public CallLowering::ValueHandler {
+ FuncReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder &MIB, CCAssignFn *AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+
+ unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ MachinePointerInfo &MPO) override {
+ llvm_unreachable("Don't know how to get a stack address yet");
+ }
+
+ void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ CCValAssign &VA) override {
+ MIB.addUse(PhysReg, RegState::Implicit);
+ unsigned ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildCopy(PhysReg, ExtReg);
+ }
+
+ void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+ llvm_unreachable("Don't know how to assign a value to an address yet");
+ }
+
+ MachineInstrBuilder &MIB;
+};
+} // End anonymous namespace.
+
bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val, unsigned VReg) const {
- // TODO: handle functions returning non-void values.
- if (Val)
- return false;
- MIRBuilder.buildInstr(X86::RET).addImm(0);
+ assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg");
+
+ auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0);
+
+ if (VReg) {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ auto &DL = MF.getDataLayout();
+ const Function &F = *MF.getFunction();
+
+ ArgInfo OrigArg{VReg, Val->getType()};
+ setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
+
+ SmallVector<ArgInfo, 8> SplitArgs;
+ splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
+ [&](unsigned Reg, uint64_t Offset) {
+ MIRBuilder.buildExtract(Reg, VReg, Offset);
+ });
+ FuncReturnHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
+ return false;
+ }
+
+ MIRBuilder.insertInstr(MIB);
return true;
}
+namespace {
+struct FormalArgHandler : public CallLowering::ValueHandler {
+ FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn, const DataLayout &DL)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), DL(DL) {}
+
+ unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ MachinePointerInfo &MPO) override {
+
+ auto &MFI = MIRBuilder.getMF().getFrameInfo();
+ int FI = MFI.CreateFixedObject(Size, Offset, true);
+ MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+
+ unsigned AddrReg = MRI.createGenericVirtualRegister(
+ LLT::pointer(0, DL.getPointerSizeInBits(0)));
+ MIRBuilder.buildFrameIndex(AddrReg, FI);
+ return AddrReg;
+ }
+
+ void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+
+ auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+ MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
+ 0);
+ MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+ }
+
+ void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ CCValAssign &VA) override {
+ MIRBuilder.getMBB().addLiveIn(PhysReg);
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ }
+
+ const DataLayout &DL;
+};
+} // namespace
+
bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<unsigned> VRegs) const {
- // TODO: handle functions with one or more arguments.
- return F.arg_empty();
+ if (F.arg_empty())
+ return true;
+
+ // TODO: handle variadic function
+ if (F.isVarArg())
+ return false;
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ auto DL = MF.getDataLayout();
+
+ SmallVector<ArgInfo, 8> SplitArgs;
+ unsigned Idx = 0;
+ for (auto &Arg : F.args()) {
+ ArgInfo OrigArg(VRegs[Idx], Arg.getType());
+ setArgFlags(OrigArg, Idx + 1, DL, F);
+ LLT Ty = MRI.getType(VRegs[Idx]);
+ unsigned Dst = VRegs[Idx];
+ bool Split = false;
+ splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
+ [&](unsigned Reg, uint64_t Offset) {
+ if (!Split) {
+ Split = true;
+ Dst = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildUndef(Dst);
+ }
+ unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset);
+ Dst = Tmp;
+ });
+ if (Dst != VRegs[Idx])
+ MIRBuilder.buildCopy(VRegs[Idx], Dst);
+ Idx++;
+ }
+
+ MachineBasicBlock &MBB = MIRBuilder.getMBB();
+ if (!MBB.empty())
+ MIRBuilder.setInstr(*MBB.begin());
+
+ FormalArgHandler Handler(MIRBuilder, MRI, CC_X86, DL);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
+ return false;
+
+ // Move back to the end of the basic block.
+ MIRBuilder.setMBB(MBB);
+
+ return true;
}
diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h
index f2672f09d855..204e6974c702 100644
--- a/lib/Target/X86/X86CallLowering.h
+++ b/lib/Target/X86/X86CallLowering.h
@@ -34,6 +34,14 @@ public:
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
+private:
+ /// A function of this type is used to perform value split action.
+ typedef std::function<void(unsigned, uint64_t)> SplitArgTy;
+
+ void splitToValueTypes(const ArgInfo &OrigArgInfo,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, MachineRegisterInfo &MRI,
+ SplitArgTy SplitArg) const;
};
} // End of namespace llvm;
#endif
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index cf7bc981b8a5..6781d761a1c4 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -1074,6 +1074,8 @@ def CSR_32_AllRegs_AVX512 : CalleeSavedRegs<(add CSR_32_AllRegs,
(sequence "K%u", 0, 7))>;
def CSR_64_AllRegs : CalleeSavedRegs<(add CSR_64_MostRegs, RAX)>;
+def CSR_64_AllRegs_NoSSE : CalleeSavedRegs<(add RAX, RBX, RCX, RDX, RSI, RDI, R8, R9,
+ R10, R11, R12, R13, R14, R15, RBP)>;
def CSR_64_AllRegs_AVX : CalleeSavedRegs<(sub (add CSR_64_MostRegs, RAX,
(sequence "YMM%u", 0, 15)),
(sequence "XMM%u", 0, 15))>;
diff --git a/lib/Target/X86/X86EvexToVex.cpp b/lib/Target/X86/X86EvexToVex.cpp
index bdd1ab537bb2..6472bbbc9016 100755
--- a/lib/Target/X86/X86EvexToVex.cpp
+++ b/lib/Target/X86/X86EvexToVex.cpp
@@ -20,16 +20,30 @@
//===---------------------------------------------------------------------===//
#include "InstPrinter/X86InstComments.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "X86.h"
-#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
-#include "X86InstrTablesInfo.h"
-#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
-#include "X86TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
+// Including the generated EVEX2VEX tables.
+struct X86EvexToVexCompressTableEntry {
+ uint16_t EvexOpcode;
+ uint16_t VexOpcode;
+};
+#include "X86GenEVEX2VEXTables.inc"
+
#define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible"
#define EVEX2VEX_NAME "x86-evex-to-vex-compress"
@@ -56,8 +70,6 @@ class EvexToVexInstPass : public MachineFunctionPass {
public:
static char ID;
- StringRef getPassName() const override { return EVEX2VEX_DESC; }
-
EvexToVexInstPass() : MachineFunctionPass(ID) {
initializeEvexToVexInstPassPass(*PassRegistry::getPassRegistry());
@@ -72,6 +84,8 @@ public:
}
}
+ StringRef getPassName() const override { return EVEX2VEX_DESC; }
+
/// Loop over all of the basic blocks, replacing EVEX instructions
/// by equivalent VEX instructions when possible for reducing code size.
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -88,13 +102,8 @@ private:
};
char EvexToVexInstPass::ID = 0;
-}
-INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false)
-
-FunctionPass *llvm::createX86EvexToVexInsts() {
- return new EvexToVexInstPass();
-}
+} // end anonymous namespace
bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
@@ -125,7 +134,6 @@ void EvexToVexInstPass::AddTableEntry(EvexToVexTableType &EvexToVexTable,
// For EVEX instructions that can be encoded using VEX encoding
// replace them by the VEX encoding in order to reduce size.
bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
-
// VEX format.
// # of bytes: 0,2,3 1 1 0,1 0,1,2,4 0,1
// [Prefixes] [VEX] OPCODE ModR/M [SIB] [DISP] [IMM]
@@ -211,3 +219,9 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
MI.setAsmPrinterFlag(AC_EVEX_2_VEX);
return true;
}
+
+INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false)
+
+FunctionPass *llvm::createX86EvexToVexInsts() {
+ return new EvexToVexInstPass();
+}
diff --git a/lib/Target/X86/X86ExpandPseudo.cpp b/lib/Target/X86/X86ExpandPseudo.cpp
index 985acf92a2d4..5dfd95f71301 100644
--- a/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/lib/Target/X86/X86ExpandPseudo.cpp
@@ -77,9 +77,11 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
default:
return false;
case X86::TCRETURNdi:
+ case X86::TCRETURNdicc:
case X86::TCRETURNri:
case X86::TCRETURNmi:
case X86::TCRETURNdi64:
+ case X86::TCRETURNdi64cc:
case X86::TCRETURNri64:
case X86::TCRETURNmi64: {
bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
@@ -97,6 +99,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Offset = StackAdj - MaxTCDelta;
assert(Offset >= 0 && "Offset should never be negative");
+ if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {
+ assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
+ }
+
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
@@ -105,12 +111,22 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Jump to label or value in register.
bool IsWin64 = STI->isTargetWin64();
- if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) {
+ if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
+ Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {
unsigned Op;
switch (Opcode) {
case X86::TCRETURNdi:
Op = X86::TAILJMPd;
break;
+ case X86::TCRETURNdicc:
+ Op = X86::TAILJMPd_CC;
+ break;
+ case X86::TCRETURNdi64cc:
+ assert(!MBB.getParent()->hasWinCFI() &&
+ "Conditional tail calls confuse "
+ "the Win64 unwinder.");
+ Op = X86::TAILJMPd64_CC;
+ break;
default:
// Note: Win64 uses REX prefixes indirect jumps out of functions, but
// not direct ones.
@@ -126,13 +142,17 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.addExternalSymbol(JumpTarget.getSymbolName(),
JumpTarget.getTargetFlags());
}
+ if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {
+ MIB.addImm(MBBI->getOperand(2).getImm());
+ }
+
} else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
unsigned Op = (Opcode == X86::TCRETURNmi)
? X86::TAILJMPm
: (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
for (unsigned i = 0; i != 5; ++i)
- MIB.addOperand(MBBI->getOperand(i));
+ MIB.add(MBBI->getOperand(i));
} else if (Opcode == X86::TCRETURNri64) {
BuildMI(MBB, MBBI, DL,
TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
@@ -195,7 +215,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RETL));
}
for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I)
- MIB.addOperand(MBBI->getOperand(I));
+ MIB.add(MBBI->getOperand(I));
MBB.erase(MBBI);
return true;
}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index c890fdd1e519..036f5d2610e4 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -367,6 +367,10 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
+ // TODO: Support this properly.
+ if (Subtarget->hasAVX512())
+ return false;
+ LLVM_FALLTHROUGH;
case MVT::i8:
Opc = X86::MOV8rm;
RC = &X86::GR8RegClass;
@@ -524,6 +528,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
+ bool HasSSE1 = Subtarget->hasSSE1();
bool HasSSE2 = Subtarget->hasSSE2();
bool HasSSE4A = Subtarget->hasSSE4A();
bool HasAVX = Subtarget->hasAVX();
@@ -537,6 +542,16 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
case MVT::f80: // No f80 support yet.
default: return false;
case MVT::i1: {
+ // In case ValReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(ValReg) == &X86::VK1RegClass) {
+ unsigned KValReg = ValReg;
+ ValReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ValReg)
+ .addReg(KValReg);
+ ValReg = fastEmitInst_extractsubreg(MVT::i8, ValReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -574,6 +589,9 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
} else
Opc = X86::ST_Fp64m;
break;
+ case MVT::x86mmx:
+ Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
+ break;
case MVT::v4f32:
if (Aligned) {
if (IsNonTemporal)
@@ -1268,6 +1286,16 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (SrcVT == MVT::i1) {
if (Outs[0].Flags.isSExt())
return false;
+ // In case SrcReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(SrcReg) == &X86::VK1RegClass) {
+ unsigned KSrcReg = SrcReg;
+ SrcReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), SrcReg)
+ .addReg(KSrcReg);
+ SrcReg = fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
}
@@ -1559,6 +1587,17 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
if (SrcVT == MVT::i1) {
+ // In case ResultReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(ResultReg) == &X86::VK1RegClass) {
+ unsigned KResultReg = ResultReg;
+ ResultReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(KResultReg);
+ ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
+
// Set the high bits to zero.
ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
@@ -1740,10 +1779,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
unsigned KOpReg = OpReg;
- OpReg = createResultReg(&X86::GR8RegClass);
+ OpReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), OpReg)
.addReg(KOpReg);
+ OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
+ X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(OpReg)
@@ -2084,10 +2125,12 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
unsigned KCondReg = CondReg;
- CondReg = createResultReg(&X86::GR8RegClass);
+ CondReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), CondReg)
.addReg(KCondReg, getKillRegState(CondIsKill));
+ CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
+ X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill))
@@ -2297,10 +2340,12 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
unsigned KCondReg = CondReg;
- CondReg = createResultReg(&X86::GR8RegClass);
+ CondReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), CondReg)
.addReg(KCondReg, getKillRegState(CondIsKill));
+ CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
+ X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill))
@@ -2423,12 +2468,22 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
if (OpReg == 0)
return false;
+ unsigned ImplicitDefReg;
+ if (Subtarget->hasAVX()) {
+ ImplicitDefReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+
+ }
+
unsigned ResultReg = createResultReg(RC);
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
ResultReg);
+
if (Subtarget->hasAVX())
- MIB.addReg(OpReg);
+ MIB.addReg(ImplicitDefReg);
+
MIB.addReg(OpReg);
updateValueMap(I, ResultReg);
return true;
@@ -2461,7 +2516,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
EVT DstVT = TLI.getValueType(DL, I->getType());
// This code only handles truncation to byte.
- if (DstVT != MVT::i8 && DstVT != MVT::i1)
+ // TODO: Support truncate to i1 with AVX512.
+ if (DstVT != MVT::i8 && (DstVT != MVT::i1 || Subtarget->hasAVX512()))
return false;
if (!TLI.isTypeLegal(SrcVT))
return false;
@@ -3105,8 +3161,8 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
return 0;
if (CS)
- if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) ||
- CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU())
+ if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
+ CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
return 0;
return 4;
@@ -3266,6 +3322,16 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Handle zero-extension from i1 to i8, which is common.
if (ArgVT == MVT::i1) {
+ // In case SrcReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(ArgReg) == &X86::VK1RegClass) {
+ unsigned KArgReg = ArgReg;
+ ArgReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ArgReg)
+ .addReg(KArgReg);
+ ArgReg = fastEmitInst_extractsubreg(MVT::i8, ArgReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
// Set the high bits to zero.
ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
ArgVT = MVT::i8;
@@ -3463,6 +3529,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
CCValAssign &VA = RVLocs[i];
EVT CopyVT = VA.getValVT();
unsigned CopyReg = ResultReg + i;
+ unsigned SrcReg = VA.getLocReg();
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
@@ -3470,9 +3537,19 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
report_fatal_error("SSE register return with SSE disabled");
}
+ // If the return value is an i1 and AVX-512 is enabled, we need
+ // to do a fixup to make the copy legal.
+ if (CopyVT == MVT::i1 && SrcReg == X86::AL && Subtarget->hasAVX512()) {
+ // Need to copy to a GR32 first.
+ // TODO: MOVZX isn't great here. We don't care about the upper bits.
+ SrcReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(X86::MOVZX32rr8), SrcReg).addReg(X86::AL);
+ }
+
// If we prefer to use the value in xmm registers, copy it out as f80 and
// use a truncate to move it from fp stack reg to xmm reg.
- if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
+ if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
isScalarFPTypeInSSEReg(VA.getValVT())) {
CopyVT = MVT::f80;
CopyReg = createResultReg(&X86::RFP80RegClass);
@@ -3480,7 +3557,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Copy out the result.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
+ TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
InRegs.push_back(VA.getLocReg());
// Round the f80 to the right size, which also moves it to the appropriate
@@ -3601,6 +3678,13 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
case MVT::i1:
+ if (Subtarget->hasAVX512()) {
+ // Need to copy to a VK1 register.
+ unsigned ResultReg = createResultReg(&X86::VK1RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg);
+ return ResultReg;
+ }
case MVT::i8:
return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
X86::sub_8bit);
@@ -3622,7 +3706,12 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
unsigned Opc = 0;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
- case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH;
+ case MVT::i1:
+ // TODO: Support this properly.
+ if (Subtarget->hasAVX512())
+ return 0;
+ VT = MVT::i8;
+ LLVM_FALLTHROUGH;
case MVT::i8: Opc = X86::MOV8ri; break;
case MVT::i16: Opc = X86::MOV16ri; break;
case MVT::i32: Opc = X86::MOV32ri; break;
diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp
index 8bde4bf98d66..c28746f96439 100644
--- a/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/lib/Target/X86/X86FixupBWInsts.cpp
@@ -95,10 +95,9 @@ class FixupBWInstPass : public MachineFunctionPass {
// Change the MachineInstr \p MI into an eqivalent 32 bit instruction if
// possible. Return the replacement instruction if OK, return nullptr
- // otherwise. Set WasCandidate to true or false depending on whether the
- // MI was a candidate for this sort of transformation.
- MachineInstr *tryReplaceInstr(MachineInstr *MI, MachineBasicBlock &MBB,
- bool &WasCandidate) const;
+ // otherwise.
+ MachineInstr *tryReplaceInstr(MachineInstr *MI, MachineBasicBlock &MBB) const;
+
public:
static char ID;
@@ -226,7 +225,7 @@ MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode,
unsigned NumArgs = MI->getNumOperands();
for (unsigned i = 1; i < NumArgs; ++i)
- MIB.addOperand(MI->getOperand(i));
+ MIB.add(MI->getOperand(i));
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
@@ -264,17 +263,13 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
// Drop imp-defs/uses that would be redundant with the new def/use.
for (auto &Op : MI->implicit_operands())
if (Op.getReg() != (Op.isDef() ? NewDestReg : NewSrcReg))
- MIB.addOperand(Op);
+ MIB.add(Op);
return MIB;
}
-MachineInstr *FixupBWInstPass::tryReplaceInstr(
- MachineInstr *MI, MachineBasicBlock &MBB,
- bool &WasCandidate) const {
- MachineInstr *NewMI = nullptr;
- WasCandidate = false;
-
+MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI,
+ MachineBasicBlock &MBB) const {
// See if this is an instruction of the type we are currently looking for.
switch (MI->getOpcode()) {
@@ -282,12 +277,9 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(
// Only replace 8 bit loads with the zero extending versions if
// in an inner most loop and not optimizing for size. This takes
// an extra byte to encode, and provides limited performance upside.
- if (MachineLoop *ML = MLI->getLoopFor(&MBB)) {
- if (ML->begin() == ML->end() && !OptForSize) {
- NewMI = tryReplaceLoad(X86::MOVZX32rm8, MI);
- WasCandidate = true;
- }
- }
+ if (MachineLoop *ML = MLI->getLoopFor(&MBB))
+ if (ML->begin() == ML->end() && !OptForSize)
+ return tryReplaceLoad(X86::MOVZX32rm8, MI);
break;
case X86::MOV16rm:
@@ -295,9 +287,7 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(
// Code size is the same, and there is sometimes a perf advantage
// from eliminating a false dependence on the upper portion of
// the register.
- NewMI = tryReplaceLoad(X86::MOVZX32rm16, MI);
- WasCandidate = true;
- break;
+ return tryReplaceLoad(X86::MOVZX32rm16, MI);
case X86::MOV8rr:
case X86::MOV16rr:
@@ -305,16 +295,14 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(
// Code size is either less (16) or equal (8), and there is sometimes a
// perf advantage from eliminating a false dependence on the upper portion
// of the register.
- NewMI = tryReplaceCopy(MI);
- WasCandidate = true;
- break;
+ return tryReplaceCopy(MI);
default:
// nothing to do here.
break;
}
- return NewMI;
+ return nullptr;
}
void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
@@ -338,18 +326,11 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
// We run after PEI, so we need to AddPristinesAndCSRs.
LiveRegs.addLiveOuts(MBB);
- bool WasCandidate = false;
-
for (auto I = MBB.rbegin(); I != MBB.rend(); ++I) {
MachineInstr *MI = &*I;
- MachineInstr *NewMI = tryReplaceInstr(MI, MBB, WasCandidate);
-
- // Add this to replacements if it was a candidate, even if NewMI is
- // nullptr. We will revisit that in a bit.
- if (WasCandidate) {
+ if (MachineInstr *NewMI = tryReplaceInstr(MI, MBB))
MIReplacements.push_back(std::make_pair(MI, NewMI));
- }
// We're done with this instruction, update liveness for the next one.
LiveRegs.stepBackward(*MI);
@@ -359,9 +340,7 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
MachineInstr *MI = MIReplacements.back().first;
MachineInstr *NewMI = MIReplacements.back().second;
MIReplacements.pop_back();
- if (NewMI) {
- MBB.insert(MI, NewMI);
- MBB.erase(MI);
- }
+ MBB.insert(MI, NewMI);
+ MBB.erase(MI);
}
}
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 12095917ca30..2cd4c1a3e7b3 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -120,8 +120,8 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
BuildMI(*MF, MI.getDebugLoc(),
TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
: X86::LEA64r))
- .addOperand(Dest)
- .addOperand(Src)
+ .add(Dest)
+ .add(Src)
.addImm(1)
.addReg(0)
.addImm(0)
@@ -287,8 +287,8 @@ bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
MachineInstr *NewMI =
BuildMI(*MFI, I, MI.getDebugLoc(), TII->get(NewOpcode))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1));
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1));
MFI->erase(I);
I = static_cast<MachineBasicBlock::iterator>(NewMI);
return true;
@@ -377,9 +377,9 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
const MachineOperand &Src1 = MI.getOperand(SrcR1 == DstR ? 1 : 3);
const MachineOperand &Src2 = MI.getOperand(SrcR1 == DstR ? 3 : 1);
NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addrr_opcode))
- .addOperand(Dst)
- .addOperand(Src1)
- .addOperand(Src2);
+ .add(Dst)
+ .add(Src1)
+ .add(Src2);
MFI->insert(I, NewMI);
DEBUG(NewMI->dump(););
}
@@ -387,8 +387,8 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
if (MI.getOperand(4).getImm() != 0) {
const MachineOperand &SrcR = MI.getOperand(SrcR1 == DstR ? 1 : 3);
NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addri_opcode))
- .addOperand(Dst)
- .addOperand(SrcR)
+ .add(Dst)
+ .add(SrcR)
.addImm(MI.getOperand(4).getImm());
MFI->insert(I, NewMI);
DEBUG(NewMI->dump(););
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index cd690442bb9f..78e0bca4158e 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -252,40 +252,76 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
int64_t NumBytes, bool InEpilogue) const {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
+ MachineInstr::MIFlag Flag =
+ isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy;
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
- while (Offset) {
- if (Offset > Chunk) {
- // Rather than emit a long series of instructions for large offsets,
- // load the offset into a register and do one sub/add
- unsigned Reg = 0;
+ if (Offset > Chunk) {
+ // Rather than emit a long series of instructions for large offsets,
+ // load the offset into a register and do one sub/add
+ unsigned Reg = 0;
+ unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
- if (isSub && !isEAXLiveIn(MBB))
- Reg = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
+ if (isSub && !isEAXLiveIn(MBB))
+ Reg = Rax;
+ else
+ Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+
+ unsigned MovRIOpc = Is64Bit ? X86::MOV64ri : X86::MOV32ri;
+ unsigned AddSubRROpc =
+ isSub ? getSUBrrOpcode(Is64Bit) : getADDrrOpcode(Is64Bit);
+ if (Reg) {
+ BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Reg)
+ .addImm(Offset)
+ .setMIFlag(Flag);
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
+ .addReg(StackPtr)
+ .addReg(Reg);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ return;
+ } else if (Offset > 8 * Chunk) {
+ // If we would need more than 8 add or sub instructions (a >16GB stack
+ // frame), it's worth spilling RAX to materialize this immediate.
+ // pushq %rax
+ // movabsq +-$Offset+-SlotSize, %rax
+ // addq %rsp, %rax
+ // xchg %rax, (%rsp)
+ // movq (%rsp), %rsp
+ assert(Is64Bit && "can't have 32-bit 16GB stack frame");
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
+ .addReg(Rax, RegState::Kill)
+ .setMIFlag(Flag);
+ // Subtract is not commutative, so negate the offset and always use add.
+ // Subtract 8 less and add 8 more to account for the PUSH we just did.
+ if (isSub)
+ Offset = -(Offset - SlotSize);
else
- Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
-
- if (Reg) {
- unsigned Opc = Is64Bit ? X86::MOV64ri : X86::MOV32ri;
- BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg)
- .addImm(Offset);
- Opc = isSub
- ? getSUBrrOpcode(Is64Bit)
- : getADDrrOpcode(Is64Bit);
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addReg(Reg);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
- Offset = 0;
- continue;
- }
+ Offset = Offset + SlotSize;
+ BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Rax)
+ .addImm(Offset)
+ .setMIFlag(Flag);
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
+ .addReg(Rax)
+ .addReg(StackPtr);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ // Exchange the new SP in RAX with the top of the stack.
+ addRegOffset(
+ BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
+ StackPtr, false, 0);
+ // Load new SP from the top of the stack into RSP.
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
+ StackPtr, false, 0);
+ return;
}
+ }
+ while (Offset) {
uint64_t ThisVal = std::min(Offset, Chunk);
- if (ThisVal == (Is64Bit ? 8 : 4)) {
- // Use push / pop instead.
+ if (ThisVal == SlotSize) {
+ // Use push / pop for slot sized adjustments as a size optimization. We
+ // need to find a dead register when using pop.
unsigned Reg = isSub
? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
: findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
@@ -293,23 +329,16 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
unsigned Opc = isSub
? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
: (Is64Bit ? X86::POP64r : X86::POP32r);
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
- .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
- if (isSub)
- MI->setFlag(MachineInstr::FrameSetup);
- else
- MI->setFlag(MachineInstr::FrameDestroy);
+ BuildMI(MBB, MBBI, DL, TII.get(Opc))
+ .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
+ .setMIFlag(Flag);
Offset -= ThisVal;
continue;
}
}
- MachineInstrBuilder MI = BuildStackAdjustment(
- MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue);
- if (isSub)
- MI.setMIFlag(MachineInstr::FrameSetup);
- else
- MI.setMIFlag(MachineInstr::FrameDestroy);
+ BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
+ .setMIFlag(Flag);
Offset -= ThisVal;
}
@@ -959,6 +988,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.getValueAsString()
.getAsInteger(0, StackProbeSize);
+ // Re-align the stack on 64-bit if the x86-interrupt calling convention is
+ // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
+ // stack alignment.
+ if (Fn->getCallingConv() == CallingConv::X86_INTR && Is64Bit &&
+ Fn->arg_size() == 2) {
+ StackSize += 8;
+ MFI.setStackSize(StackSize);
+ emitSPUpdate(MBB, MBBI, -8, /*InEpilogue=*/false);
+ }
+
// If this is x86-64 and the Red Zone is not disabled, if we are a leaf
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
@@ -2587,8 +2626,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
unsigned Opcode = I->getOpcode();
bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
DebugLoc DL = I->getDebugLoc();
- uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
- uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
+ uint64_t Amount = !reserveCallFrame ? TII.getFrameSize(*I) : 0;
+ uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
I = MBB.erase(I);
auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index e1b04d6dc300..863dc8b22968 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -20,6 +20,7 @@ namespace llvm {
class MachineInstrBuilder;
class MCCFIInstruction;
+class X86InstrInfo;
class X86Subtarget;
class X86RegisterInfo;
@@ -30,7 +31,7 @@ public:
// Cached subtarget predicates.
const X86Subtarget &STI;
- const TargetInstrInfo &TII;
+ const X86InstrInfo &TII;
const X86RegisterInfo *TRI;
unsigned SlotSize;
diff --git a/lib/Target/X86/X86GenRegisterBankInfo.def b/lib/Target/X86/X86GenRegisterBankInfo.def
new file mode 100644
index 000000000000..06be142432f7
--- /dev/null
+++ b/lib/Target/X86/X86GenRegisterBankInfo.def
@@ -0,0 +1,104 @@
+//===- X86GenRegisterBankInfo.def ----------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines all the static objects used by X86RegisterBankInfo.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+#ifdef GET_TARGET_REGBANK_INFO_IMPL
+RegisterBankInfo::PartialMapping X86GenRegisterBankInfo::PartMappings[]{
+ /* StartIdx, Length, RegBank */
+ // GPR value
+ {0, 8, X86::GPRRegBank}, // :0
+ {0, 16, X86::GPRRegBank}, // :1
+ {0, 32, X86::GPRRegBank}, // :2
+ {0, 64, X86::GPRRegBank}, // :3
+ // FR32/64 , xmm registers
+ {0, 32, X86::VECRRegBank}, // :4
+ {0, 64, X86::VECRRegBank}, // :5
+ // VR128/256/512
+ {0, 128, X86::VECRRegBank}, // :6
+ {0, 256, X86::VECRRegBank}, // :7
+ {0, 512, X86::VECRRegBank}, // :8
+};
+#endif // GET_TARGET_REGBANK_INFO_IMPL
+
+#ifdef GET_TARGET_REGBANK_INFO_CLASS
+enum PartialMappingIdx {
+ PMI_None = -1,
+ PMI_GPR8,
+ PMI_GPR16,
+ PMI_GPR32,
+ PMI_GPR64,
+ PMI_FP32,
+ PMI_FP64,
+ PMI_VEC128,
+ PMI_VEC256,
+ PMI_VEC512
+};
+#endif // GET_TARGET_REGBANK_INFO_CLASS
+
+#ifdef GET_TARGET_REGBANK_INFO_IMPL
+#define INSTR_3OP(INFO) INFO, INFO, INFO,
+#define BREAKDOWN(INDEX, NUM) \
+ { &X86GenRegisterBankInfo::PartMappings[INDEX], NUM }
+// ValueMappings.
+RegisterBankInfo::ValueMapping X86GenRegisterBankInfo::ValMappings[]{
+ /* BreakDown, NumBreakDowns */
+ // 3-operands instructions (all binary operations should end up with one of
+ // those mapping).
+ INSTR_3OP(BREAKDOWN(PMI_GPR8, 1)) // 0: GPR_8
+ INSTR_3OP(BREAKDOWN(PMI_GPR16, 1)) // 3: GPR_16
+ INSTR_3OP(BREAKDOWN(PMI_GPR32, 1)) // 6: GPR_32
+ INSTR_3OP(BREAKDOWN(PMI_GPR64, 1)) // 9: GPR_64
+ INSTR_3OP(BREAKDOWN(PMI_FP32, 1)) // 12: Fp32
+ INSTR_3OP(BREAKDOWN(PMI_FP64, 1)) // 15: Fp64
+ INSTR_3OP(BREAKDOWN(PMI_VEC128, 1)) // 18: Vec128
+ INSTR_3OP(BREAKDOWN(PMI_VEC256, 1)) // 21: Vec256
+ INSTR_3OP(BREAKDOWN(PMI_VEC512, 1)) // 24: Vec512
+};
+#undef INSTR_3OP
+#undef BREAKDOWN
+#endif // GET_TARGET_REGBANK_INFO_IMPL
+
+#ifdef GET_TARGET_REGBANK_INFO_CLASS
+enum ValueMappingIdx {
+ VMI_None = -1,
+ VMI_3OpsGpr8Idx = PMI_GPR8 * 3,
+ VMI_3OpsGpr16Idx = PMI_GPR16 * 3,
+ VMI_3OpsGpr32Idx = PMI_GPR32 * 3,
+ VMI_3OpsGpr64Idx = PMI_GPR64 * 3,
+ VMI_3OpsFp32Idx = PMI_FP32 * 3,
+ VMI_3OpsFp64Idx = PMI_FP64 * 3,
+ VMI_3OpsVec128Idx = PMI_VEC128 * 3,
+ VMI_3OpsVec256Idx = PMI_VEC256 * 3,
+ VMI_3OpsVec512Idx = PMI_VEC512 * 3,
+};
+#undef GET_TARGET_REGBANK_INFO_CLASS
+#endif // GET_TARGET_REGBANK_INFO_CLASS
+
+#ifdef GET_TARGET_REGBANK_INFO_IMPL
+#undef GET_TARGET_REGBANK_INFO_IMPL
+const RegisterBankInfo::ValueMapping *
+X86GenRegisterBankInfo::getValueMapping(PartialMappingIdx Idx,
+ unsigned NumOperands) {
+
+ // We can use VMI_3Ops Mapping for all the cases.
+ if (NumOperands <= 3 && (Idx >= PMI_GPR8 && Idx <= PMI_VEC512))
+ return &ValMappings[(unsigned)Idx * 3];
+
+ llvm_unreachable("Unsupported PartialMappingIdx.");
+}
+
+#endif // GET_TARGET_REGBANK_INFO_IMPL
+
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 8ab4c0616880..eb5c56ff2ff9 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -188,7 +188,6 @@ namespace {
private:
void Select(SDNode *N) override;
- bool tryGather(SDNode *N, unsigned Opc);
bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
@@ -384,6 +383,16 @@ namespace {
bool ComplexPatternFuncMutatesDAG() const override {
return true;
}
+
+ bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
+
+ /// Returns whether this is a relocatable immediate in the range
+ /// [-2^Width .. 2^Width-1].
+ template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
+ if (auto *CN = dyn_cast<ConstantSDNode>(N))
+ return isInt<Width>(CN->getSExtValue());
+ return isSExtAbsoluteSymbolRef(Width, N);
+ }
};
}
@@ -709,7 +718,8 @@ bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
// For more information see http://people.redhat.com/drepper/tls.pdf
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
- Subtarget->isTargetGlibc())
+ (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
+ Subtarget->isTargetFuchsia()))
switch (N->getPointerInfo().getAddrSpace()) {
case 256:
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
@@ -1325,8 +1335,8 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM.Scale = 1;
// Insert the new nodes into the topological ordering.
- insertDAGNode(*CurDAG, N, Zero);
- insertDAGNode(*CurDAG, N, Neg);
+ insertDAGNode(*CurDAG, Handle.getValue(), Zero);
+ insertDAGNode(*CurDAG, Handle.getValue(), Neg);
return false;
}
@@ -1789,6 +1799,21 @@ SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
}
+bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
+ if (N->getOpcode() == ISD::TRUNCATE)
+ N = N->getOperand(0).getNode();
+ if (N->getOpcode() != X86ISD::Wrapper)
+ return false;
+
+ auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
+ if (!GA)
+ return false;
+
+ Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
+ return CR && CR->getSignedMin().sge(-1ull << Width) &&
+ CR->getSignedMax().slt(1ull << Width);
+}
+
/// Test whether the given X86ISD::CMP node has any uses which require the SF
/// or OF bits to be accurate.
static bool hasNoSignedComparisonUses(SDNode *N) {
@@ -1905,6 +1930,8 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
SDValue Op = Chain.getOperand(i);
if (Op == Load.getValue(1)) {
ChainCheck = true;
+ // Drop Load, but keep its chain. No cycle check necessary.
+ ChainOps.push_back(Load.getOperand(0));
continue;
}
@@ -1954,39 +1981,6 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
llvm_unreachable("unrecognized size for LdVT");
}
-/// Customized ISel for GATHER operations.
-bool X86DAGToDAGISel::tryGather(SDNode *Node, unsigned Opc) {
- // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
- SDValue Chain = Node->getOperand(0);
- SDValue VSrc = Node->getOperand(2);
- SDValue Base = Node->getOperand(3);
- SDValue VIdx = Node->getOperand(4);
- SDValue VMask = Node->getOperand(5);
- ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
- if (!Scale)
- return false;
-
- SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
- MVT::Other);
-
- SDLoc DL(Node);
-
- // Memory Operands: Base, Scale, Index, Disp, Segment
- SDValue Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
- SDValue Segment = CurDAG->getRegister(0, MVT::i32);
- const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue(), DL), VIdx,
- Disp, Segment, VMask, Chain};
- SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
- // Node has 2 outputs: VDst and MVT::Other.
- // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
- // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
- // of ResNode.
- ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
- ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
- CurDAG->RemoveDeadNode(Node);
- return true;
-}
-
void X86DAGToDAGISel::Select(SDNode *Node) {
MVT NVT = Node->getSimpleValueType(0);
unsigned Opc, MOpc;
@@ -2024,55 +2018,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
- case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
- switch (IntNo) {
- default: break;
- case Intrinsic::x86_avx2_gather_d_pd:
- case Intrinsic::x86_avx2_gather_d_pd_256:
- case Intrinsic::x86_avx2_gather_q_pd:
- case Intrinsic::x86_avx2_gather_q_pd_256:
- case Intrinsic::x86_avx2_gather_d_ps:
- case Intrinsic::x86_avx2_gather_d_ps_256:
- case Intrinsic::x86_avx2_gather_q_ps:
- case Intrinsic::x86_avx2_gather_q_ps_256:
- case Intrinsic::x86_avx2_gather_d_q:
- case Intrinsic::x86_avx2_gather_d_q_256:
- case Intrinsic::x86_avx2_gather_q_q:
- case Intrinsic::x86_avx2_gather_q_q_256:
- case Intrinsic::x86_avx2_gather_d_d:
- case Intrinsic::x86_avx2_gather_d_d_256:
- case Intrinsic::x86_avx2_gather_q_d:
- case Intrinsic::x86_avx2_gather_q_d_256: {
- if (!Subtarget->hasAVX2())
- break;
- unsigned Opc;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic");
- case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break;
- case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
- case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break;
- case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
- case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break;
- case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
- case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break;
- case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
- case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break;
- case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break;
- case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break;
- case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break;
- case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break;
- case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break;
- case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
- case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
- }
- if (tryGather(Node, Opc))
- return;
- break;
- }
- }
- break;
- }
case X86ISD::GlobalBaseReg:
ReplaceNode(Node, getGlobalBaseReg());
return;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 08fe2bad281e..7ff483063ec2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -53,6 +53,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <bitset>
@@ -70,6 +71,13 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization(
"rather than promotion."),
cl::Hidden);
+static cl::opt<int> ExperimentalPrefLoopAlignment(
+ "x86-experimental-pref-loop-alignment", cl::init(4),
+ cl::desc("Sets the preferable loop alignment for experiments "
+ "(the last x86-experimental-pref-loop-alignment bits"
+ " of the loop header PC will be 0)."),
+ cl::Hidden);
+
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -427,7 +435,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ExternalSymbol , VT, Custom);
setOperationAction(ISD::BlockAddress , VT, Custom);
}
- // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
+
+ // 64-bit shl, sra, srl (iff 32-bit x86)
for (auto VT : { MVT::i32, MVT::i64 }) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
@@ -782,6 +791,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
@@ -888,6 +898,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
+ setOperationAction(ISD::ABS, MVT::v16i8, Legal);
+ setOperationAction(ISD::ABS, MVT::v8i16, Legal);
+ setOperationAction(ISD::ABS, MVT::v4i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
@@ -922,6 +935,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// SSE41 brings specific instructions for doing vector sign extend even in
// cases where we don't have SRA.
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Legal);
+
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v2i64, Legal);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v4i32, Legal);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v8i16, Legal);
+
for (MVT VT : MVT::integer_vector_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
@@ -1065,6 +1086,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
+ setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
@@ -1126,7 +1148,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
}
@@ -1271,6 +1293,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
if (Subtarget.hasVLX()) {
+ setOperationAction(ISD::ABS, MVT::v4i64, Legal);
+ setOperationAction(ISD::ABS, MVT::v2i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
@@ -1357,16 +1381,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v16i32, Legal);
setOperationAction(ISD::UMIN, MVT::v8i64, Legal);
- setOperationAction(ISD::ADD, MVT::v8i1, Expand);
- setOperationAction(ISD::ADD, MVT::v16i1, Expand);
- setOperationAction(ISD::SUB, MVT::v8i1, Expand);
- setOperationAction(ISD::SUB, MVT::v16i1, Expand);
- setOperationAction(ISD::MUL, MVT::v8i1, Expand);
- setOperationAction(ISD::MUL, MVT::v16i1, Expand);
+ setOperationAction(ISD::ADD, MVT::v8i1, Custom);
+ setOperationAction(ISD::ADD, MVT::v16i1, Custom);
+ setOperationAction(ISD::SUB, MVT::v8i1, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i1, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i1, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i1, Custom);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
+ setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
@@ -1441,7 +1466,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::MGATHER, VT, Legal);
@@ -1460,12 +1485,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
- setOperationAction(ISD::ADD, MVT::v32i1, Expand);
- setOperationAction(ISD::ADD, MVT::v64i1, Expand);
- setOperationAction(ISD::SUB, MVT::v32i1, Expand);
- setOperationAction(ISD::SUB, MVT::v64i1, Expand);
- setOperationAction(ISD::MUL, MVT::v32i1, Expand);
- setOperationAction(ISD::MUL, MVT::v64i1, Expand);
+ setOperationAction(ISD::ADD, MVT::v32i1, Custom);
+ setOperationAction(ISD::ADD, MVT::v64i1, Custom);
+ setOperationAction(ISD::SUB, MVT::v32i1, Custom);
+ setOperationAction(ISD::SUB, MVT::v64i1, Custom);
+ setOperationAction(ISD::MUL, MVT::v32i1, Custom);
+ setOperationAction(ISD::MUL, MVT::v64i1, Custom);
setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
@@ -1479,8 +1504,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1, Custom);
@@ -1546,6 +1571,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
@@ -1574,9 +1600,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
for (auto VT : { MVT::v2i1, MVT::v4i1 }) {
- setOperationAction(ISD::ADD, VT, Expand);
- setOperationAction(ISD::SUB, VT, Expand);
- setOperationAction(ISD::MUL, VT, Expand);
+ setOperationAction(ISD::ADD, VT, Custom);
+ setOperationAction(ISD::SUB, VT, Custom);
+ setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::TRUNCATE, VT, Custom);
@@ -1671,6 +1697,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
@@ -1696,6 +1723,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
+ setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
@@ -1712,7 +1741,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
MaxStoresPerMemmoveOptSize = 4;
- setPrefLoopAlignment(4); // 2^4 bytes.
+ // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
+ setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
// An out-of-order CPU can speculatively execute past a predictable branch,
// but a conditional move could be stalled by an expensive earlier operation.
@@ -1933,6 +1963,34 @@ bool X86TargetLowering::useSoftFloat() const {
return Subtarget.useSoftFloat();
}
+void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
+ ArgListTy &Args) const {
+
+ // Only relabel X86-32 for C / Stdcall CCs.
+ if (Subtarget.is64Bit())
+ return;
+ if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
+ return;
+ unsigned ParamRegs = 0;
+ if (auto *M = MF->getFunction()->getParent())
+ ParamRegs = M->getNumberRegisterParameters();
+
+ // Mark the first N int arguments as having reg
+ for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
+ Type *T = Args[Idx].Ty;
+ if (T->isPointerTy() || T->isIntegerTy())
+ if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
+ unsigned numRegs = 1;
+ if (MF->getDataLayout().getTypeAllocSize(T) > 4)
+ numRegs = 2;
+ if (ParamRegs < numRegs)
+ return;
+ ParamRegs -= numRegs;
+ Args[Idx].IsInReg = true;
+ }
+ }
+}
+
const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
@@ -2001,21 +2059,37 @@ unsigned X86TargetLowering::getAddressSpace() const {
return 256;
}
-Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
- // glibc has a special slot for the stack guard in tcbhead_t, use it instead
- // of the usual global variable (see sysdeps/{i386,x86_64}/nptl/tls.h)
- if (!Subtarget.isTargetGlibc())
- return TargetLowering::getIRStackGuard(IRB);
-
- // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
- // %gs:0x14 on i386
- unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
- unsigned AddressSpace = getAddressSpace();
+static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
+ return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
+ (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
+}
+
+static Constant* SegmentOffset(IRBuilder<> &IRB,
+ unsigned Offset, unsigned AddressSpace) {
return ConstantExpr::getIntToPtr(
ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
}
+Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
+ // glibc, bionic, and Fuchsia have a special slot for the stack guard in
+ // tcbhead_t; use it instead of the usual global variable (see
+ // sysdeps/{i386,x86_64}/nptl/tls.h)
+ if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
+ if (Subtarget.isTargetFuchsia()) {
+ // <magenta/tls.h> defines MX_TLS_STACK_GUARD_OFFSET with this value.
+ return SegmentOffset(IRB, 0x10, getAddressSpace());
+ } else {
+ // %fs:0x28, unless we're using a Kernel code model, in which case
+ // it's %gs:0x28. gs:0x14 on i386.
+ unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
+ return SegmentOffset(IRB, Offset, getAddressSpace());
+ }
+ }
+
+ return TargetLowering::getIRStackGuard(IRB);
+}
+
void X86TargetLowering::insertSSPDeclarations(Module &M) const {
// MSVC CRT provides functionalities for stack protection.
if (Subtarget.getTargetTriple().isOSMSVCRT()) {
@@ -2027,13 +2101,13 @@ void X86TargetLowering::insertSSPDeclarations(Module &M) const {
auto *SecurityCheckCookie = cast<Function>(
M.getOrInsertFunction("__security_check_cookie",
Type::getVoidTy(M.getContext()),
- Type::getInt8PtrTy(M.getContext()), nullptr));
+ Type::getInt8PtrTy(M.getContext())));
SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
return;
}
- // glibc has a special slot for the stack guard.
- if (Subtarget.isTargetGlibc())
+ // glibc, bionic, and Fuchsia have a special slot for the stack guard.
+ if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
return;
TargetLowering::insertSSPDeclarations(M);
}
@@ -2056,21 +2130,23 @@ Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
if (Subtarget.getTargetTriple().isOSContiki())
return getDefaultSafeStackPointerLocation(IRB, false);
- if (!Subtarget.isTargetAndroid())
- return TargetLowering::getSafeStackPointerLocation(IRB);
-
// Android provides a fixed TLS slot for the SafeStack pointer. See the
// definition of TLS_SLOT_SAFESTACK in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
- unsigned AddressSpace, Offset;
+ if (Subtarget.isTargetAndroid()) {
+ // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
+ // %gs:0x24 on i386
+ unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
+ return SegmentOffset(IRB, Offset, getAddressSpace());
+ }
- // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
- // %gs:0x24 on i386
- Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
- AddressSpace = getAddressSpace();
- return ConstantExpr::getIntToPtr(
- ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
- Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
+ // Fuchsia is similar.
+ if (Subtarget.isTargetFuchsia()) {
+ // <magenta/tls.h> defines MX_TLS_UNSAFE_SP_OFFSET with this value.
+ return SegmentOffset(IRB, 0x18, getAddressSpace());
+ }
+
+ return TargetLowering::getSafeStackPointerLocation(IRB);
}
bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
@@ -2179,6 +2255,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
++I, ++OutsIndex) {
CCValAssign &VA = RVLocs[I];
assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // Add the register to the CalleeSaveDisableRegs list.
+ if (CallConv == CallingConv::X86_RegCall)
+ MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
+
SDValue ValToCopy = OutVals[OutsIndex];
EVT ValVT = ValToCopy.getValueType();
@@ -2253,6 +2334,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(2 == RegsToPass.size() &&
"Expecting two registers after Pass64BitArgInRegs");
+
+ // Add the second register to the CalleeSaveDisableRegs list.
+ if (CallConv == CallingConv::X86_RegCall)
+ MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
} else {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
}
@@ -2309,6 +2394,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// RAX/EAX now acts like a return value.
RetOps.push_back(
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
+
+ // Add the returned register to the CalleeSaveDisableRegs list.
+ if (CallConv == CallingConv::X86_RegCall)
+ MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
}
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -2444,7 +2533,7 @@ static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
// Convert the i32 type into v32i1 type
Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
- // Concantenate the two values together
+ // Concatenate the two values together
return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
}
@@ -2488,8 +2577,10 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
SDValue X86TargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+ uint32_t *RegMask) const {
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget.is64Bit();
@@ -2503,6 +2594,14 @@ SDValue X86TargetLowering::LowerCallResult(
CCValAssign &VA = RVLocs[I];
EVT CopyVT = VA.getLocVT();
+ // In some calling conventions we need to remove the used registers
+ // from the register mask.
+ if (RegMask && CallConv == CallingConv::X86_RegCall) {
+ for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
+ }
+
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
@@ -2669,6 +2768,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
+ MVT PtrVT = getPointerTy(DAG.getDataLayout());
// If value is passed by pointer we have address passed instead of the value
// itself. No need to extend if the mask value and location share the same
@@ -2686,13 +2786,16 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
// taken by a return address.
int Offset = 0;
if (CallConv == CallingConv::X86_INTR) {
- const X86Subtarget& Subtarget =
- static_cast<const X86Subtarget&>(DAG.getSubtarget());
// X86 interrupts may take one or two arguments.
// On the stack there will be no return address as in regular call.
// Offset of last argument need to be set to -4/-8 bytes.
// Where offset of the first argument out of two, should be set to 0 bytes.
Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
+ if (Subtarget.is64Bit() && Ins.size() == 2) {
+ // The stack pointer needs to be realigned for 64 bit handlers with error
+ // code, so the argument offset changes by 8 bytes.
+ Offset += 8;
+ }
}
// FIXME: For now, all byval parameter objects are marked mutable. This can be
@@ -2707,30 +2810,71 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
if (CallConv == CallingConv::X86_INTR) {
MFI.setObjectOffset(FI, Offset);
}
- return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- } else {
- int FI = MFI.CreateFixedObject(ValVT.getSizeInBits()/8,
- VA.getLocMemOffset(), isImmutable);
-
- // Set SExt or ZExt flag.
- if (VA.getLocInfo() == CCValAssign::ZExt) {
- MFI.setObjectZExt(FI, true);
- } else if (VA.getLocInfo() == CCValAssign::SExt) {
- MFI.setObjectSExt(FI, true);
+ return DAG.getFrameIndex(FI, PtrVT);
+ }
+
+ // This is an argument in memory. We might be able to perform copy elision.
+ if (Flags.isCopyElisionCandidate()) {
+ EVT ArgVT = Ins[i].ArgVT;
+ SDValue PartAddr;
+ if (Ins[i].PartOffset == 0) {
+ // If this is a one-part value or the first part of a multi-part value,
+ // create a stack object for the entire argument value type and return a
+ // load from our portion of it. This assumes that if the first part of an
+ // argument is in memory, the rest will also be in memory.
+ int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
+ /*Immutable=*/false);
+ PartAddr = DAG.getFrameIndex(FI, PtrVT);
+ return DAG.getLoad(
+ ValVT, dl, Chain, PartAddr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ } else {
+ // This is not the first piece of an argument in memory. See if there is
+ // already a fixed stack object including this offset. If so, assume it
+ // was created by the PartOffset == 0 branch above and create a load from
+ // the appropriate offset into it.
+ int64_t PartBegin = VA.getLocMemOffset();
+ int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
+ int FI = MFI.getObjectIndexBegin();
+ for (; MFI.isFixedObjectIndex(FI); ++FI) {
+ int64_t ObjBegin = MFI.getObjectOffset(FI);
+ int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
+ if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
+ break;
+ }
+ if (MFI.isFixedObjectIndex(FI)) {
+ SDValue Addr =
+ DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
+ DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
+ return DAG.getLoad(
+ ValVT, dl, Chain, Addr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
+ Ins[i].PartOffset));
+ }
}
+ }
- // Adjust SP offset of interrupt parameter.
- if (CallConv == CallingConv::X86_INTR) {
- MFI.setObjectOffset(FI, Offset);
- }
+ int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
+ VA.getLocMemOffset(), isImmutable);
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- SDValue Val = DAG.getLoad(
- ValVT, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
- return ExtendedInMem ?
- DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
+ // Set SExt or ZExt flag.
+ if (VA.getLocInfo() == CCValAssign::ZExt) {
+ MFI.setObjectZExt(FI, true);
+ } else if (VA.getLocInfo() == CCValAssign::SExt) {
+ MFI.setObjectSExt(FI, true);
+ }
+
+ // Adjust SP offset of interrupt parameter.
+ if (CallConv == CallingConv::X86_INTR) {
+ MFI.setObjectOffset(FI, Offset);
}
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getLoad(
+ ValVT, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)
+ : Val;
}
// FIXME: Get this from tablegen.
@@ -2781,12 +2925,14 @@ static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
}
+#ifndef NDEBUG
static bool isSortedByValueNo(const SmallVectorImpl<CCValAssign> &ArgLocs) {
return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
[](const CCValAssign &A, const CCValAssign &B) -> bool {
return A.getValNo() < B.getValNo();
});
}
+#endif
SDValue X86TargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
@@ -2836,8 +2982,8 @@ SDValue X86TargetLowering::LowerFormalArguments(
// The next loop assumes that the locations are in the same order of the
// input arguments.
- if (!isSortedByValueNo(ArgLocs))
- llvm_unreachable("Argument Location list must be sorted before lowering");
+ assert(isSortedByValueNo(ArgLocs) &&
+ "Argument Location list must be sorted before lowering");
SDValue ArgValue;
for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
@@ -2853,7 +2999,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
"Currently the only custom case is when we split v64i1 to 2 regs");
// v64i1 values, in regcall calling convention, that are
- // compiled to 32 bit arch, are splited up into two registers.
+ // compiled to 32 bit arch, are split up into two registers.
ArgValue =
getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
} else {
@@ -3107,8 +3253,9 @@ SDValue X86TargetLowering::LowerFormalArguments(
MF.getTarget().Options.GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
} else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
- // X86 interrupts must pop the error code if present
- FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4);
+ // X86 interrupts must pop the error code (and the alignment padding) if
+ // present.
+ FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
@@ -3146,6 +3293,12 @@ SDValue X86TargetLowering::LowerFormalArguments(
}
}
+ if (CallConv == CallingConv::X86_RegCall) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end()))
+ MF.getRegInfo().disableCalleeSavedRegister(Pair.first);
+ }
+
return Chain;
}
@@ -3348,8 +3501,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The next loop assumes that the locations are in the same order of the
// input arguments.
- if (!isSortedByValueNo(ArgLocs))
- llvm_unreachable("Argument Location list must be sorted before lowering");
+ assert(isSortedByValueNo(ArgLocs) &&
+ "Argument Location list must be sorted before lowering");
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
@@ -3517,7 +3670,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (VA.isRegLoc()) {
if (VA.needsCustom()) {
assert((CallConv == CallingConv::X86_RegCall) &&
- "Expecting custome case only in regcall calling convention");
+ "Expecting custom case only in regcall calling convention");
// This means that we are in special case where one argument was
// passed through two register locations - Skip the next location
++I;
@@ -3662,7 +3815,32 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Mask = RegInfo->getNoPreservedMask();
}
- Ops.push_back(DAG.getRegisterMask(Mask));
+ // Define a new register mask from the existing mask.
+ uint32_t *RegMask = nullptr;
+
+ // In some calling conventions we need to remove the used physical registers
+ // from the reg mask.
+ if (CallConv == CallingConv::X86_RegCall) {
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+ // Allocate a new Reg Mask and copy Mask.
+ RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
+ unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+ memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);
+
+ // Make sure all sub registers of the argument registers are reset
+ // in the RegMask.
+ for (auto const &RegPair : RegsToPass)
+ for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
+
+ // Create the RegMask Operand according to our updated mask.
+ Ops.push_back(DAG.getRegisterMask(RegMask));
+ } else {
+ // Create the RegMask Operand according to the static mask.
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -3715,8 +3893,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
- Ins, dl, DAG, InVals);
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, RegMask);
}
//===----------------------------------------------------------------------===//
@@ -4132,6 +4310,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
return true;
// 'Faux' Target Shuffles.
case ISD::AND:
+ case X86ISD::ANDNP:
return true;
}
}
@@ -4448,6 +4627,11 @@ bool X86TargetLowering::isCtlzFast() const {
return Subtarget.hasFastLZCNT();
}
+bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
+ const Instruction &AndI) const {
+ return true;
+}
+
bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
if (!Subtarget.hasBMI())
return false;
@@ -4460,6 +4644,26 @@ bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
return true;
}
+MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
+ MVT VT = MVT::getIntegerVT(NumBits);
+ if (isTypeLegal(VT))
+ return VT;
+
+ // PMOVMSKB can handle this.
+ if (NumBits == 128 && isTypeLegal(MVT::v16i8))
+ return MVT::v16i8;
+
+ // VPMOVMSKB can handle this.
+ if (NumBits == 256 && isTypeLegal(MVT::v32i8))
+ return MVT::v32i8;
+
+ // TODO: Allow 64-bit type for 32-bit target.
+ // TODO: 512-bit types should be allowed, but make sure that those
+ // cases are handled in combineVectorSizedSetCCEquality().
+
+ return MVT::INVALID_SIMPLE_VALUE_TYPE;
+}
+
/// Val is the undef sentinel value or equal to the specified value.
static bool isUndefOrEqual(int Val, int CmpVal) {
return ((Val == SM_SentinelUndef) || (Val == CmpVal));
@@ -4555,28 +4759,30 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
SmallVectorImpl<int> &WidenedMask) {
WidenedMask.assign(Mask.size() / 2, 0);
for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
+ int M0 = Mask[i];
+ int M1 = Mask[i + 1];
+
// If both elements are undef, its trivial.
- if (Mask[i] == SM_SentinelUndef && Mask[i + 1] == SM_SentinelUndef) {
+ if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
WidenedMask[i / 2] = SM_SentinelUndef;
continue;
}
// Check for an undef mask and a mask value properly aligned to fit with
// a pair of values. If we find such a case, use the non-undef mask's value.
- if (Mask[i] == SM_SentinelUndef && Mask[i + 1] >= 0 &&
- Mask[i + 1] % 2 == 1) {
- WidenedMask[i / 2] = Mask[i + 1] / 2;
+ if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
+ WidenedMask[i / 2] = M1 / 2;
continue;
}
- if (Mask[i + 1] == SM_SentinelUndef && Mask[i] >= 0 && Mask[i] % 2 == 0) {
- WidenedMask[i / 2] = Mask[i] / 2;
+ if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
+ WidenedMask[i / 2] = M0 / 2;
continue;
}
// When zeroing, we need to spread the zeroing across both lanes to widen.
- if (Mask[i] == SM_SentinelZero || Mask[i + 1] == SM_SentinelZero) {
- if ((Mask[i] == SM_SentinelZero || Mask[i] == SM_SentinelUndef) &&
- (Mask[i + 1] == SM_SentinelZero || Mask[i + 1] == SM_SentinelUndef)) {
+ if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
+ if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
+ (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
WidenedMask[i / 2] = SM_SentinelZero;
continue;
}
@@ -4585,9 +4791,8 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
// Finally check if the two mask values are adjacent and aligned with
// a pair.
- if (Mask[i] != SM_SentinelUndef && Mask[i] % 2 == 0 &&
- Mask[i] + 1 == Mask[i + 1]) {
- WidenedMask[i / 2] = Mask[i] / 2;
+ if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
+ WidenedMask[i / 2] = M0 / 2;
continue;
}
@@ -4770,9 +4975,10 @@ static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
return ConstsNode;
}
-static SDValue getConstVector(ArrayRef<APInt> Bits, SmallBitVector &Undefs,
+static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
- assert(Bits.size() == Undefs.size() && "Unequal constant and undef arrays");
+ assert(Bits.size() == Undefs.getBitWidth() &&
+ "Unequal constant and undef arrays");
SmallVector<SDValue, 32> Ops;
bool Split = false;
@@ -4844,10 +5050,6 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
VT.getVectorNumElements()/Factor);
- // Extract from UNDEF is UNDEF.
- if (Vec.isUndef())
- return DAG.getUNDEF(ResultVT);
-
// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
@@ -4918,50 +5120,6 @@ static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, const SDLoc &dl) {
assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
-
- // For insertion into the zero index (low half) of a 256-bit vector, it is
- // more efficient to generate a blend with immediate instead of an insert*128.
- // We are still creating an INSERT_SUBVECTOR below with an undef node to
- // extend the subvector to the size of the result vector. Make sure that
- // we are not recursing on that node by checking for undef here.
- if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
- !Result.isUndef()) {
- EVT ResultVT = Result.getValueType();
- SDValue ZeroIndex = DAG.getIntPtrConstant(0, dl);
- SDValue Undef = DAG.getUNDEF(ResultVT);
- SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
- Vec, ZeroIndex);
-
- // The blend instruction, and therefore its mask, depend on the data type.
- MVT ScalarType = ResultVT.getVectorElementType().getSimpleVT();
- if (ScalarType.isFloatingPoint()) {
- // Choose either vblendps (float) or vblendpd (double).
- unsigned ScalarSize = ScalarType.getSizeInBits();
- assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
- unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
- SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8);
- return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
- }
-
- const X86Subtarget &Subtarget =
- static_cast<const X86Subtarget &>(DAG.getSubtarget());
-
- // AVX2 is needed for 256-bit integer blend support.
- // Integers must be cast to 32-bit because there is only vpblendd;
- // vpblendw can't be used for this because it has a handicapped mask.
-
- // If we don't have AVX2, then cast to float. Using a wrong domain blend
- // is still more efficient than using the wrong domain vinsertf128 that
- // will be created by InsertSubVector().
- MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
-
- SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8);
- Result = DAG.getBitcast(CastVT, Result);
- Vec256 = DAG.getBitcast(CastVT, Vec256);
- Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
- return DAG.getBitcast(ResultVT, Vec256);
- }
-
return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
}
@@ -5023,7 +5181,8 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (Vec.isUndef()) {
if (IdxVal != 0) {
SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8);
- WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, WideSubVec, ShiftBits);
+ WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
+ ShiftBits);
}
return ExtractSubVec(WideSubVec);
}
@@ -5032,9 +5191,9 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
NumElems = WideOpVT.getVectorNumElements();
unsigned ShiftLeft = NumElems - SubVecNumElems;
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
- Vec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, WideSubVec,
- DAG.getConstant(ShiftLeft, dl, MVT::i8));
- Vec = ShiftRight ? DAG.getNode(X86ISD::VSRLI, dl, WideOpVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
+ DAG.getConstant(ShiftLeft, dl, MVT::i8));
+ Vec = ShiftRight ? DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
DAG.getConstant(ShiftRight, dl, MVT::i8)) : Vec;
return ExtractSubVec(Vec);
}
@@ -5043,8 +5202,8 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// Zero lower bits of the Vec
SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
- Vec = DAG.getNode(X86ISD::VSRLI, dl, WideOpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
// Merge them together, SubVec should be zero extended.
WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
getZeroVector(WideOpVT, Subtarget, DAG, dl),
@@ -5056,12 +5215,12 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// Simple case when we put subvector in the upper part
if (IdxVal + SubVecNumElems == NumElems) {
// Zero upper bits of the Vec
- WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, WideSubVec,
+ WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
- Vec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::VSRLI, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
return ExtractSubVec(Vec);
}
@@ -5094,26 +5253,38 @@ static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
}
/// Returns a vector of specified type with all bits set.
-/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
-/// no AVX2 support, use two <4 x i32> inserted in a <8 x i32> appropriately.
+/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
/// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget,
- SelectionDAG &DAG, const SDLoc &dl) {
+static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Expected a 128/256/512-bit vector type");
APInt Ones = APInt::getAllOnesValue(32);
unsigned NumElts = VT.getSizeInBits() / 32;
- SDValue Vec;
- if (!Subtarget.hasInt256() && NumElts == 8) {
- Vec = DAG.getConstant(Ones, dl, MVT::v4i32);
- Vec = concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
- } else {
- Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
- }
+ SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
return DAG.getBitcast(VT, Vec);
}
+static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
+ SelectionDAG &DAG) {
+ EVT InVT = In.getValueType();
+ assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode");
+
+ if (VT.is128BitVector() && InVT.is128BitVector())
+ return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
+ : DAG.getZeroExtendVectorInReg(In, DL, VT);
+
+ // For 256-bit vectors, we only need the lower (128-bit) input half.
+ // For 512-bit vectors, we only need the lower input half or quarter.
+ if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
+ int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
+ In = extractSubVector(In, 0, DAG, DL,
+ std::max(128, (int)VT.getSizeInBits() / Scale));
+ }
+
+ return DAG.getNode(Opc, DL, VT, In);
+}
+
/// Generate unpacklo/unpackhi shuffle mask.
static void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
bool Unary) {
@@ -5199,9 +5370,10 @@ static const Constant *getTargetConstantFromNode(SDValue Op) {
// Extract raw constant bits from constant pools.
static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
- SmallBitVector &UndefElts,
- SmallVectorImpl<APInt> &EltBits) {
- assert(UndefElts.empty() && "Expected an empty UndefElts vector");
+ APInt &UndefElts,
+ SmallVectorImpl<APInt> &EltBits,
+ bool AllowWholeUndefs = true,
+ bool AllowPartialUndefs = true) {
assert(EltBits.empty() && "Expected an empty EltBits vector");
Op = peekThroughBitcasts(Op);
@@ -5211,56 +5383,83 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!");
unsigned NumElts = SizeInBits / EltSizeInBits;
+ unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
+ unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+
// Extract all the undef/constant element data and pack into single bitsets.
APInt UndefBits(SizeInBits, 0);
APInt MaskBits(SizeInBits, 0);
// Split the undef/constant single bitset data into the target elements.
auto SplitBitData = [&]() {
- UndefElts = SmallBitVector(NumElts, false);
+ // Don't split if we don't allow undef bits.
+ bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
+ if (UndefBits.getBoolValue() && !AllowUndefs)
+ return false;
+
+ UndefElts = APInt(NumElts, 0);
EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
for (unsigned i = 0; i != NumElts; ++i) {
- APInt UndefEltBits = UndefBits.lshr(i * EltSizeInBits);
- UndefEltBits = UndefEltBits.zextOrTrunc(EltSizeInBits);
+ unsigned BitOffset = i * EltSizeInBits;
+ APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
- // Only treat an element as UNDEF if all bits are UNDEF, otherwise
- // treat it as zero.
+ // Only treat an element as UNDEF if all bits are UNDEF.
if (UndefEltBits.isAllOnesValue()) {
- UndefElts[i] = true;
+ if (!AllowWholeUndefs)
+ return false;
+ UndefElts.setBit(i);
continue;
}
- APInt Bits = MaskBits.lshr(i * EltSizeInBits);
- Bits = Bits.zextOrTrunc(EltSizeInBits);
+ // If only some bits are UNDEF then treat them as zero (or bail if not
+ // supported).
+ if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
+ return false;
+
+ APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
EltBits[i] = Bits.getZExtValue();
}
return true;
};
- auto ExtractConstantBits = [SizeInBits](const Constant *Cst, APInt &Mask,
- APInt &Undefs) {
+ // Collect constant bits and insert into mask/undef bit masks.
+ auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
+ unsigned BitOffset) {
if (!Cst)
return false;
unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
if (isa<UndefValue>(Cst)) {
- Mask = APInt::getNullValue(SizeInBits);
- Undefs = APInt::getLowBitsSet(SizeInBits, CstSizeInBits);
+ Undefs.setBits(BitOffset, BitOffset + CstSizeInBits);
return true;
}
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
- Mask = CInt->getValue().zextOrTrunc(SizeInBits);
- Undefs = APInt::getNullValue(SizeInBits);
+ Mask.insertBits(CInt->getValue(), BitOffset);
return true;
}
if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
- Mask = CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
- Undefs = APInt::getNullValue(SizeInBits);
+ Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset);
return true;
}
return false;
};
+ // Extract constant bits from build vector.
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ const SDValue &Src = Op.getOperand(i);
+ unsigned BitOffset = i * SrcEltSizeInBits;
+ if (Src.isUndef()) {
+ UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
+ continue;
+ }
+ auto *Cst = cast<ConstantSDNode>(Src);
+ APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
+ MaskBits.insertBits(Bits, BitOffset);
+ }
+ return SplitBitData();
+ }
+
// Extract constant bits from constant pool vector.
if (auto *Cst = getTargetConstantFromNode(Op)) {
Type *CstTy = Cst->getType();
@@ -5268,117 +5467,59 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
return false;
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
- for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i) {
- APInt Bits, Undefs;
- if (!ExtractConstantBits(Cst->getAggregateElement(i), Bits, Undefs))
+ for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i)
+ if (!CollectConstantBits(Cst->getAggregateElement(i), MaskBits, UndefBits,
+ i * CstEltSizeInBits))
return false;
- MaskBits |= Bits.shl(i * CstEltSizeInBits);
- UndefBits |= Undefs.shl(i * CstEltSizeInBits);
- }
return SplitBitData();
}
// Extract constant bits from a broadcasted constant pool scalar.
if (Op.getOpcode() == X86ISD::VBROADCAST &&
- EltSizeInBits <= Op.getScalarValueSizeInBits()) {
+ EltSizeInBits <= SrcEltSizeInBits) {
if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
- APInt Bits, Undefs;
- if (ExtractConstantBits(Broadcast, Bits, Undefs)) {
- unsigned NumBroadcastBits = Op.getScalarValueSizeInBits();
- unsigned NumBroadcastElts = SizeInBits / NumBroadcastBits;
- for (unsigned i = 0; i != NumBroadcastElts; ++i) {
- MaskBits |= Bits.shl(i * NumBroadcastBits);
- UndefBits |= Undefs.shl(i * NumBroadcastBits);
+ APInt Bits(SizeInBits, 0);
+ APInt Undefs(SizeInBits, 0);
+ if (CollectConstantBits(Broadcast, Bits, Undefs, 0)) {
+ for (unsigned i = 0; i != NumSrcElts; ++i) {
+ MaskBits |= Bits.shl(i * SrcEltSizeInBits);
+ UndefBits |= Undefs.shl(i * SrcEltSizeInBits);
}
return SplitBitData();
}
}
}
+ // Extract a rematerialized scalar constant insertion.
+ if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
+ Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
+ auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
+ MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
+ MaskBits = MaskBits.zext(SizeInBits);
+ return SplitBitData();
+ }
+
return false;
}
-// TODO: Merge more of this with getTargetConstantBitsFromNode.
static bool getTargetShuffleMaskIndices(SDValue MaskNode,
unsigned MaskEltSizeInBits,
SmallVectorImpl<uint64_t> &RawMask) {
- MaskNode = peekThroughBitcasts(MaskNode);
-
- MVT VT = MaskNode.getSimpleValueType();
- assert(VT.isVector() && "Can't produce a non-vector with a build_vector!");
- unsigned NumMaskElts = VT.getSizeInBits() / MaskEltSizeInBits;
-
- // Split an APInt element into MaskEltSizeInBits sized pieces and
- // insert into the shuffle mask.
- auto SplitElementToMask = [&](APInt Element) {
- // Note that this is x86 and so always little endian: the low byte is
- // the first byte of the mask.
- int Split = VT.getScalarSizeInBits() / MaskEltSizeInBits;
- for (int i = 0; i < Split; ++i) {
- APInt RawElt = Element.getLoBits(MaskEltSizeInBits);
- Element = Element.lshr(MaskEltSizeInBits);
- RawMask.push_back(RawElt.getZExtValue());
- }
- };
-
- if (MaskNode.getOpcode() == X86ISD::VBROADCAST) {
- // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
- // TODO: Handle (VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0
- if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
- return false;
- if (auto *CN = dyn_cast<ConstantSDNode>(MaskNode.getOperand(0))) {
- const APInt &MaskElement = CN->getAPIntValue();
- for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
- APInt RawElt = MaskElement.getLoBits(MaskEltSizeInBits);
- RawMask.push_back(RawElt.getZExtValue());
- }
- }
+ APInt UndefElts;
+ SmallVector<APInt, 64> EltBits;
+
+ // Extract the raw target constant bits.
+ // FIXME: We currently don't support UNDEF bits or mask entries.
+ if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
+ EltBits, /* AllowWholeUndefs */ false,
+ /* AllowPartialUndefs */ false))
return false;
- }
- if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
- MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
- SDValue MaskOp = MaskNode.getOperand(0).getOperand(0);
- if (auto *CN = dyn_cast<ConstantSDNode>(MaskOp)) {
- if ((MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0) {
- RawMask.push_back(CN->getZExtValue());
- RawMask.append(NumMaskElts - 1, 0);
- return true;
- }
-
- if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0) {
- unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;
- SplitElementToMask(CN->getAPIntValue());
- RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0);
- return true;
- }
- }
- return false;
- }
-
- if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)
- return false;
-
- // We can always decode if the buildvector is all zero constants,
- // but can't use isBuildVectorAllZeros as it might contain UNDEFs.
- if (all_of(MaskNode->ops(), X86::isZeroNode)) {
- RawMask.append(NumMaskElts, 0);
- return true;
- }
-
- // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
- if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
- return false;
-
- for (SDValue Op : MaskNode->ops()) {
- if (auto *CN = dyn_cast<ConstantSDNode>(Op.getNode()))
- SplitElementToMask(CN->getAPIntValue());
- else if (auto *CFN = dyn_cast<ConstantFPSDNode>(Op.getNode()))
- SplitElementToMask(CFN->getValueAPF().bitcastToAPInt());
- else
- return false;
- }
+ // Insert the extracted elements into the mask.
+ for (APInt Elt : EltBits)
+ RawMask.push_back(Elt.getZExtValue());
return true;
}
@@ -5405,6 +5546,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
case X86ISD::BLENDI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::SHUFP:
ImmN = N->getOperand(N->getNumOperands()-1);
@@ -5473,8 +5615,18 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
IsUnary = true;
break;
case X86ISD::VBROADCAST: {
- // We only decode broadcasts of same-sized vectors at the moment.
- if (N->getOperand(0).getValueType() == VT) {
+ SDValue N0 = N->getOperand(0);
+ // See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so,
+ // add the pre-extracted value to the Ops vector.
+ if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N0.getOperand(0).getValueType() == VT &&
+ N0.getConstantOperandVal(1) == 0)
+ Ops.push_back(N0.getOperand(0));
+
+ // We only decode broadcasts of same-sized vectors, unless the broadcast
+ // came from an extract from the original width. If we found one, we
+ // pushed it the Ops vector above.
+ if (N0.getValueType() == VT || !Ops.empty()) {
DecodeVectorBroadcast(VT, Mask);
IsUnary = true;
break;
@@ -5669,6 +5821,19 @@ static bool setTargetShuffleZeroElements(SDValue N,
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
+ assert((VT.getSizeInBits() % Mask.size()) == 0 &&
+ "Illegal split of shuffle value type");
+ unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size();
+
+ // Extract known constant input data.
+ APInt UndefSrcElts[2];
+ SmallVector<APInt, 32> SrcEltBits[2];
+ bool IsSrcConstant[2] = {
+ getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0],
+ SrcEltBits[0], true, false),
+ getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1],
+ SrcEltBits[1], true, false)};
+
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
int M = Mask[i];
@@ -5677,6 +5842,7 @@ static bool setTargetShuffleZeroElements(SDValue N,
continue;
// Determine shuffle input and normalize the mask.
+ unsigned SrcIdx = M / Size;
SDValue V = M < Size ? V1 : V2;
M %= Size;
@@ -5686,39 +5852,27 @@ static bool setTargetShuffleZeroElements(SDValue N,
continue;
}
- // Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements.
- if (V.getOpcode() != ISD::BUILD_VECTOR)
- continue;
-
- // If the BUILD_VECTOR has fewer elements then the (larger) source
- // element must be UNDEF/ZERO.
- // TODO: Is it worth testing the individual bits of a constant?
- if ((Size % V.getNumOperands()) == 0) {
- int Scale = Size / V->getNumOperands();
- SDValue Op = V.getOperand(M / Scale);
- if (Op.isUndef())
+ // SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF.
+ // TODO: We currently only set UNDEF for integer types - floats use the same
+ // registers as vectors and many of the scalar folded loads rely on the
+ // SCALAR_TO_VECTOR pattern.
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ (Size % V.getValueType().getVectorNumElements()) == 0) {
+ int Scale = Size / V.getValueType().getVectorNumElements();
+ int Idx = M / Scale;
+ if (Idx != 0 && !VT.isFloatingPoint())
Mask[i] = SM_SentinelUndef;
- else if (X86::isZeroNode(Op))
+ else if (Idx == 0 && X86::isZeroNode(V.getOperand(0)))
Mask[i] = SM_SentinelZero;
continue;
}
- // If the BUILD_VECTOR has more elements then all the (smaller) source
- // elements must be all UNDEF or all ZERO.
- if ((V.getNumOperands() % Size) == 0) {
- int Scale = V->getNumOperands() / Size;
- bool AllUndef = true;
- bool AllZero = true;
- for (int j = 0; j < Scale; ++j) {
- SDValue Op = V.getOperand((M * Scale) + j);
- AllUndef &= Op.isUndef();
- AllZero &= X86::isZeroNode(Op);
- }
- if (AllUndef)
+ // Attempt to extract from the source's constant bits.
+ if (IsSrcConstant[SrcIdx]) {
+ if (UndefSrcElts[SrcIdx][M])
Mask[i] = SM_SentinelUndef;
- else if (AllZero)
+ else if (SrcEltBits[SrcIdx][M] == 0)
Mask[i] = SM_SentinelZero;
- continue;
}
}
@@ -5744,11 +5898,16 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
unsigned Opcode = N.getOpcode();
switch (Opcode) {
- case ISD::AND: {
+ case ISD::AND:
+ case X86ISD::ANDNP: {
// Attempt to decode as a per-byte mask.
- SmallBitVector UndefElts;
+ APInt UndefElts;
SmallVector<APInt, 32> EltBits;
- if (!getTargetConstantBitsFromNode(N.getOperand(1), 8, UndefElts, EltBits))
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ bool IsAndN = (X86ISD::ANDNP == Opcode);
+ uint64_t ZeroMask = IsAndN ? 255 : 0;
+ if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
return false;
for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
if (UndefElts[i]) {
@@ -5758,9 +5917,55 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
uint64_t ByteBits = EltBits[i].getZExtValue();
if (ByteBits != 0 && ByteBits != 255)
return false;
- Mask.push_back(ByteBits == 0 ? SM_SentinelZero : i);
+ Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i);
}
- Ops.push_back(N.getOperand(0));
+ Ops.push_back(IsAndN ? N1 : N0);
+ return true;
+ }
+ case ISD::SCALAR_TO_VECTOR: {
+ // Match against a scalar_to_vector of an extract from a similar vector.
+ SDValue N0 = N.getOperand(0);
+ if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ N0.getOperand(0).getValueType() != VT ||
+ !isa<ConstantSDNode>(N0.getOperand(1)) ||
+ NumElts <= N0.getConstantOperandVal(1) ||
+ !N->isOnlyUserOf(N0.getNode()))
+ return false;
+ Ops.push_back(N0.getOperand(0));
+ Mask.push_back(N0.getConstantOperandVal(1));
+ Mask.append(NumElts - 1, SM_SentinelUndef);
+ return true;
+ }
+ case X86ISD::PINSRB:
+ case X86ISD::PINSRW: {
+ SDValue InVec = N.getOperand(0);
+ SDValue InScl = N.getOperand(1);
+ uint64_t InIdx = N.getConstantOperandVal(2);
+ assert(InIdx < NumElts && "Illegal insertion index");
+
+ // Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern.
+ if (X86::isZeroNode(InScl)) {
+ Ops.push_back(InVec);
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(i == InIdx ? SM_SentinelZero : (int)i);
+ return true;
+ }
+
+ // Attempt to recognise a PINSR*(ASSERTZEXT(PEXTR*)) shuffle pattern.
+ // TODO: Expand this to support INSERT_VECTOR_ELT/etc.
+ unsigned ExOp =
+ (X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW);
+ if (InScl.getOpcode() != ISD::AssertZext ||
+ InScl.getOperand(0).getOpcode() != ExOp)
+ return false;
+
+ SDValue ExVec = InScl.getOperand(0).getOperand(0);
+ uint64_t ExIdx = InScl.getOperand(0).getConstantOperandVal(1);
+ assert(ExIdx < NumElts && "Illegal extraction index");
+ Ops.push_back(InVec);
+ Ops.push_back(ExVec);
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
return true;
}
case X86ISD::VSHLI:
@@ -5795,6 +6000,7 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
}
return true;
}
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
case X86ISD::VZEXT: {
// TODO - add support for VPMOVZX with smaller input vector types.
SDValue Src = N.getOperand(0);
@@ -5810,36 +6016,38 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
return false;
}
+/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly.
+static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
+ SmallVectorImpl<int> &Mask) {
+ int MaskWidth = Mask.size();
+ SmallVector<SDValue, 16> UsedInputs;
+ for (int i = 0, e = Inputs.size(); i < e; ++i) {
+ int lo = UsedInputs.size() * MaskWidth;
+ int hi = lo + MaskWidth;
+ if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
+ UsedInputs.push_back(Inputs[i]);
+ continue;
+ }
+ for (int &M : Mask)
+ if (lo <= M)
+ M -= MaskWidth;
+ }
+ Inputs = UsedInputs;
+}
+
/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs
/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the
/// remaining input indices in case we now have a unary shuffle and adjust the
-/// Op0/Op1 inputs accordingly.
+/// inputs accordingly.
/// Returns true if the target shuffle mask was decoded.
-static bool resolveTargetShuffleInputs(SDValue Op, SDValue &Op0, SDValue &Op1,
+static bool resolveTargetShuffleInputs(SDValue Op,
+ SmallVectorImpl<SDValue> &Inputs,
SmallVectorImpl<int> &Mask) {
- SmallVector<SDValue, 2> Ops;
- if (!setTargetShuffleZeroElements(Op, Mask, Ops))
- if (!getFauxShuffleMask(Op, Mask, Ops))
+ if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
+ if (!getFauxShuffleMask(Op, Mask, Inputs))
return false;
- int NumElts = Mask.size();
- bool Op0InUse = any_of(Mask, [NumElts](int Idx) {
- return 0 <= Idx && Idx < NumElts;
- });
- bool Op1InUse = any_of(Mask, [NumElts](int Idx) { return NumElts <= Idx; });
-
- Op0 = Op0InUse ? Ops[0] : SDValue();
- Op1 = Op1InUse ? Ops[1] : SDValue();
-
- // We're only using Op1 - commute the mask and inputs.
- if (!Op0InUse && Op1InUse) {
- for (int &M : Mask)
- if (NumElts <= M)
- M -= NumElts;
- Op0 = Op1;
- Op1 = SDValue();
- }
-
+ resolveTargetShuffleInputsAndMask(Inputs, Mask);
return true;
}
@@ -5914,10 +6122,9 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
/// Custom lower build_vector of v16i8.
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
- unsigned NumNonZero, unsigned NumZero,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- const TargetLowering &TLI) {
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
if (NumNonZero > 8)
return SDValue();
@@ -5928,18 +6135,26 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
// SSE4.1 - use PINSRB to insert each byte directly.
if (Subtarget.hasSSE41()) {
for (unsigned i = 0; i < 16; ++i) {
- bool isNonZero = (NonZeros & (1 << i)) != 0;
- if (isNonZero) {
+ bool IsNonZero = (NonZeros & (1 << i)) != 0;
+ if (IsNonZero) {
+ // If the build vector contains zeros or our first insertion is not the
+ // first index then insert into zero vector to break any register
+ // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
if (First) {
- if (NumZero)
- V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
- else
- V = DAG.getUNDEF(MVT::v16i8);
First = false;
+ if (NumZero || 0 != i)
+ V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
+ else {
+ assert(0 == i && "Expected insertion into zero-index");
+ V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
+ V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
+ V = DAG.getBitcast(MVT::v16i8, V);
+ continue;
+ }
}
- V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
- MVT::v16i8, V, Op.getOperand(i),
- DAG.getIntPtrConstant(i, dl));
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V,
+ Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
}
}
@@ -5958,24 +6173,35 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
}
if ((i & 1) != 0) {
+ // FIXME: Investigate extending to i32 instead of just i16.
+ // FIXME: Investigate combining the first 4 bytes as a i32 instead.
SDValue ThisElt, LastElt;
- bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
+ bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
if (LastIsNonZero) {
- LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
- MVT::i16, Op.getOperand(i-1));
+ LastElt =
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1));
}
if (ThisIsNonZero) {
ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
- ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
- ThisElt, DAG.getConstant(8, dl, MVT::i8));
+ ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt,
+ DAG.getConstant(8, dl, MVT::i8));
if (LastIsNonZero)
ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
} else
ThisElt = LastElt;
- if (ThisElt.getNode())
- V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
- DAG.getIntPtrConstant(i/2, dl));
+ if (ThisElt) {
+ if (1 == i) {
+ V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
+ : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
+ V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
+ V = DAG.getBitcast(MVT::v8i16, V);
+ } else {
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
+ DAG.getIntPtrConstant(i / 2, dl));
+ }
+ }
}
}
@@ -5986,8 +6212,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- const TargetLowering &TLI) {
+ const X86Subtarget &Subtarget) {
if (NumNonZero > 4)
return SDValue();
@@ -5995,18 +6220,26 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
SDValue V;
bool First = true;
for (unsigned i = 0; i < 8; ++i) {
- bool isNonZero = (NonZeros & (1 << i)) != 0;
- if (isNonZero) {
+ bool IsNonZero = (NonZeros & (1 << i)) != 0;
+ if (IsNonZero) {
+ // If the build vector contains zeros or our first insertion is not the
+ // first index then insert into zero vector to break any register
+ // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
if (First) {
- if (NumZero)
- V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
- else
- V = DAG.getUNDEF(MVT::v8i16);
First = false;
+ if (NumZero || 0 != i)
+ V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
+ else {
+ assert(0 == i && "Expected insertion into zero-index");
+ V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
+ V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
+ V = DAG.getBitcast(MVT::v8i16, V);
+ continue;
+ }
}
- V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
- MVT::v8i16, V, Op.getOperand(i),
- DAG.getIntPtrConstant(i, dl));
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V,
+ Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
}
}
@@ -6015,8 +6248,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
/// Custom lower build_vector of v4i32 or v4f32.
static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- const TargetLowering &TLI) {
+ const X86Subtarget &Subtarget) {
// Find all zeroable elements.
std::bitset<4> Zeroable;
for (int i=0; i < 4; ++i) {
@@ -6212,7 +6444,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
///
/// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a
static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
- SDLoc &DL, SelectionDAG &DAG,
+ const SDLoc &DL, SelectionDAG &DAG,
bool isAfterLegalize) {
unsigned NumElems = Elts.size();
@@ -6376,14 +6608,14 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
return SDValue();
}
-static Constant *getConstantVector(MVT VT, APInt SplatValue,
+static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
unsigned SplatBitSize, LLVMContext &C) {
unsigned ScalarSize = VT.getScalarSizeInBits();
unsigned NumElm = SplatBitSize / ScalarSize;
SmallVector<Constant *, 32> ConstantVec;
for (unsigned i = 0; i < NumElm; i++) {
- APInt Val = SplatValue.lshr(ScalarSize * i).trunc(ScalarSize);
+ APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
Constant *Const;
if (VT.isFloatingPoint()) {
assert((ScalarSize == 32 || ScalarSize == 64) &&
@@ -6664,6 +6896,7 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
SDValue ExtIdx = Op.getOperand(i).getOperand(1);
+
// Quit if non-constant index.
if (!isa<ConstantSDNode>(ExtIdx))
return SDValue();
@@ -6694,11 +6927,10 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);
- for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
- unsigned Idx = InsertIndices[i];
+
+ for (unsigned Idx : InsertIndices)
NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
DAG.getIntPtrConstant(Idx, DL));
- }
return NV;
}
@@ -7347,7 +7579,7 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
(VT == MVT::v8i32 && Subtarget.hasInt256()))
return Op;
- return getOnesVector(VT, Subtarget, DAG, DL);
+ return getOnesVector(VT, DAG, DL);
}
return SDValue();
@@ -7418,7 +7650,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// a constant pool load than it is to do a movd + shuffle.
if (ExtVT == MVT::i64 && !Subtarget.is64Bit() &&
(!IsAllConstants || Idx == 0)) {
- if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
+ if (DAG.MaskedValueIsZero(Item, APInt::getHighBitsSet(64, 32))) {
// Handle SSE only.
assert(VT == MVT::v2i64 && "Expected an SSE value type!");
MVT VecVT = MVT::v4i32;
@@ -7561,17 +7793,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16)
if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero,
- DAG, Subtarget, *this))
+ DAG, Subtarget))
return V;
if (EVTBits == 16 && NumElems == 8)
if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero,
- DAG, Subtarget, *this))
+ DAG, Subtarget))
return V;
// If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
if (EVTBits == 32 && NumElems == 4)
- if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this))
+ if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget))
return V;
// If element VT is == 32 bits, turn it into a number of shuffles.
@@ -7767,7 +7999,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl);
if (V1.isUndef())
- V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal);
if (IsZeroV1)
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal);
@@ -7956,7 +8188,7 @@ static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef<int> Mask,
ExpectedBV->getOperand(ExpectedMask[i] % Size))
return false;
}
-}
+ }
return true;
}
@@ -7986,6 +8218,41 @@ static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
return true;
}
+// Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle
+// mask.
+static SmallVector<int, 64> createTargetShuffleMask(ArrayRef<int> Mask,
+ const APInt &Zeroable) {
+ int NumElts = Mask.size();
+ assert(NumElts == (int)Zeroable.getBitWidth() && "Mismatch mask sizes");
+
+ SmallVector<int, 64> TargetMask(NumElts, SM_SentinelUndef);
+ for (int i = 0; i != NumElts; ++i) {
+ int M = Mask[i];
+ if (M == SM_SentinelUndef)
+ continue;
+ assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index");
+ TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M);
+ }
+ return TargetMask;
+}
+
+// Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
+// instructions.
+static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
+ if (VT != MVT::v8i32 && VT != MVT::v8f32)
+ return false;
+
+ SmallVector<int, 8> Unpcklwd;
+ createUnpackShuffleMask(MVT::v8i16, Unpcklwd, /* Lo = */ true,
+ /* Unary = */ false);
+ SmallVector<int, 8> Unpckhwd;
+ createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false,
+ /* Unary = */ false);
+ bool IsUnpackwdMask = (isTargetShuffleEquivalent(Mask, Unpcklwd) ||
+ isTargetShuffleEquivalent(Mask, Unpckhwd));
+ return IsUnpackwdMask;
+}
+
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
///
/// This helper function produces an 8-bit shuffle immediate corresponding to
@@ -8009,7 +8276,7 @@ static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) {
return Imm;
}
-static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
+static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
SelectionDAG &DAG) {
return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
}
@@ -8022,9 +8289,9 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
/// as many lanes with this technique as possible to simplify the remaining
/// shuffle.
-static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
- SDValue V1, SDValue V2) {
- SmallBitVector Zeroable(Mask.size(), false);
+static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
+ SDValue V1, SDValue V2) {
+ APInt Zeroable(Mask.size(), 0);
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
@@ -8039,7 +8306,7 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
int M = Mask[i];
// Handle the easy cases.
if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
- Zeroable[i] = true;
+ Zeroable.setBit(i);
continue;
}
@@ -8057,17 +8324,19 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
int Scale = Size / V->getNumOperands();
SDValue Op = V.getOperand(M / Scale);
if (Op.isUndef() || X86::isZeroNode(Op))
- Zeroable[i] = true;
+ Zeroable.setBit(i);
else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = Cst->getAPIntValue();
Val = Val.lshr((M % Scale) * ScalarSizeInBits);
Val = Val.getLoBits(ScalarSizeInBits);
- Zeroable[i] = (Val == 0);
+ if (Val == 0)
+ Zeroable.setBit(i);
} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
APInt Val = Cst->getValueAPF().bitcastToAPInt();
Val = Val.lshr((M % Scale) * ScalarSizeInBits);
Val = Val.getLoBits(ScalarSizeInBits);
- Zeroable[i] = (Val == 0);
+ if (Val == 0)
+ Zeroable.setBit(i);
}
continue;
}
@@ -8081,7 +8350,8 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
SDValue Op = V.getOperand((M * Scale) + j);
AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op));
}
- Zeroable[i] = AllZeroable;
+ if (AllZeroable)
+ Zeroable.setBit(i);
continue;
}
}
@@ -8096,19 +8366,20 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
//
// The function looks for a sub-mask that the nonzero elements are in
// increasing order. If such sub-mask exist. The function returns true.
-static bool isNonZeroElementsInOrder(const SmallBitVector Zeroable,
- ArrayRef<int> Mask,const EVT &VectorType,
+static bool isNonZeroElementsInOrder(const APInt &Zeroable,
+ ArrayRef<int> Mask, const EVT &VectorType,
bool &IsZeroSideLeft) {
int NextElement = -1;
// Check if the Mask's nonzero elements are in increasing order.
- for (int i = 0, e = Zeroable.size(); i < e; i++) {
+ for (int i = 0, e = Mask.size(); i < e; i++) {
// Checks if the mask's zeros elements are built from only zeros.
- if (Mask[i] == -1)
+ assert(Mask[i] >= -1 && "Out of bound mask element!");
+ if (Mask[i] < 0)
return false;
if (Zeroable[i])
continue;
// Find the lowest non zero element
- if (NextElement == -1) {
+ if (NextElement < 0) {
NextElement = Mask[i] != 0 ? VectorType.getVectorNumElements() : 0;
IsZeroSideLeft = NextElement != 0;
}
@@ -8124,7 +8395,7 @@ static bool isNonZeroElementsInOrder(const SmallBitVector Zeroable,
static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
int Size = Mask.size();
@@ -8179,19 +8450,9 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
const SDLoc &dl);
-// Function convertBitVectorToUnsigned - The function gets SmallBitVector
-// as argument and convert him to unsigned.
-// The output of the function is not(zeroable)
-static unsigned convertBitVectorToUnsiged(const SmallBitVector &Zeroable) {
- unsigned convertBit = 0;
- for (int i = 0, e = Zeroable.size(); i < e; i++)
- convertBit |= !(Zeroable[i]) << i;
- return convertBit;
-}
-
// X86 has dedicated shuffle that can be lowered to VEXPAND
static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
ArrayRef<int> Mask, SDValue &V1,
SDValue &V2, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -8199,7 +8460,7 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(),
IsLeftZeroSide))
return SDValue();
- unsigned VEXPANDMask = convertBitVectorToUnsiged(Zeroable);
+ unsigned VEXPANDMask = (~Zeroable).getZExtValue();
MVT IntegerType =
MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType);
@@ -8215,6 +8476,91 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
ZeroVector);
}
+static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
+ unsigned &UnpackOpcode, bool IsUnary,
+ ArrayRef<int> TargetMask, SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ int NumElts = VT.getVectorNumElements();
+
+ bool Undef1 = true, Undef2 = true, Zero1 = true, Zero2 = true;
+ for (int i = 0; i != NumElts; i += 2) {
+ int M1 = TargetMask[i + 0];
+ int M2 = TargetMask[i + 1];
+ Undef1 &= (SM_SentinelUndef == M1);
+ Undef2 &= (SM_SentinelUndef == M2);
+ Zero1 &= isUndefOrZero(M1);
+ Zero2 &= isUndefOrZero(M2);
+ }
+ assert(!((Undef1 || Zero1) && (Undef2 || Zero2)) &&
+ "Zeroable shuffle detected");
+
+ // Attempt to match the target mask against the unpack lo/hi mask patterns.
+ SmallVector<int, 64> Unpckl, Unpckh;
+ createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary);
+ if (isTargetShuffleEquivalent(TargetMask, Unpckl)) {
+ UnpackOpcode = X86ISD::UNPCKL;
+ V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
+ V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
+ return true;
+ }
+
+ createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary);
+ if (isTargetShuffleEquivalent(TargetMask, Unpckh)) {
+ UnpackOpcode = X86ISD::UNPCKH;
+ V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
+ V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
+ return true;
+ }
+
+ // If an unary shuffle, attempt to match as an unpack lo/hi with zero.
+ if (IsUnary && (Zero1 || Zero2)) {
+ // Don't bother if we can blend instead.
+ if ((Subtarget.hasSSE41() || VT == MVT::v2i64 || VT == MVT::v2f64) &&
+ isSequentialOrUndefOrZeroInRange(TargetMask, 0, NumElts, 0))
+ return false;
+
+ bool MatchLo = true, MatchHi = true;
+ for (int i = 0; (i != NumElts) && (MatchLo || MatchHi); ++i) {
+ int M = TargetMask[i];
+
+ // Ignore if the input is known to be zero or the index is undef.
+ if ((((i & 1) == 0) && Zero1) || (((i & 1) == 1) && Zero2) ||
+ (M == SM_SentinelUndef))
+ continue;
+
+ MatchLo &= (M == Unpckl[i]);
+ MatchHi &= (M == Unpckh[i]);
+ }
+
+ if (MatchLo || MatchHi) {
+ UnpackOpcode = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
+ V2 = Zero2 ? getZeroVector(VT, Subtarget, DAG, DL) : V1;
+ V1 = Zero1 ? getZeroVector(VT, Subtarget, DAG, DL) : V1;
+ return true;
+ }
+ }
+
+ // If a binary shuffle, commute and try again.
+ if (!IsUnary) {
+ ShuffleVectorSDNode::commuteMask(Unpckl);
+ if (isTargetShuffleEquivalent(TargetMask, Unpckl)) {
+ UnpackOpcode = X86ISD::UNPCKL;
+ std::swap(V1, V2);
+ return true;
+ }
+
+ ShuffleVectorSDNode::commuteMask(Unpckh);
+ if (isTargetShuffleEquivalent(TargetMask, Unpckh)) {
+ UnpackOpcode = X86ISD::UNPCKH;
+ std::swap(V1, V2);
+ return true;
+ }
+ }
+
+ return false;
+}
+
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
@@ -8248,13 +8594,12 @@ static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
/// one of the inputs being zeroable.
static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SelectionDAG &DAG) {
assert(!VT.isFloatingPoint() && "Floating point types are not supported");
MVT EltVT = VT.getVectorElementType();
SDValue Zero = DAG.getConstant(0, DL, EltVT);
- SDValue AllOnes =
- DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, EltVT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
SDValue V;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
@@ -8286,10 +8631,8 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
SelectionDAG &DAG) {
assert(VT.isInteger() && "Only supports integer vector types!");
MVT EltVT = VT.getVectorElementType();
- int NumEltBits = EltVT.getSizeInBits();
SDValue Zero = DAG.getConstant(0, DL, EltVT);
- SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
- EltVT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
SmallVector<SDValue, 16> MaskOps;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
if (Mask[i] >= 0 && Mask[i] != i && Mask[i] != i + Size)
@@ -8307,51 +8650,81 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
return DAG.getNode(ISD::OR, DL, VT, V1, V2);
}
-/// \brief Try to emit a blend instruction for a shuffle.
-///
-/// This doesn't do any checks for the availability of instructions for blending
-/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
-/// be matched in the backend with the type given. What it does check for is
-/// that the shuffle mask is a blend, or convertible into a blend with zero.
-static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Original,
- const SmallBitVector &Zeroable,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
- bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
- SmallVector<int, 8> Mask(Original.begin(), Original.end());
- bool ForceV1Zero = false, ForceV2Zero = false;
+static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
+ SDValue PreservedSrc,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG);
+
+static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2,
+ MutableArrayRef<int> TargetMask,
+ bool &ForceV1Zero, bool &ForceV2Zero,
+ uint64_t &BlendMask) {
+ bool V1IsZeroOrUndef =
+ V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode());
+ bool V2IsZeroOrUndef =
+ V2.isUndef() || ISD::isBuildVectorAllZeros(V2.getNode());
+
+ BlendMask = 0;
+ ForceV1Zero = false, ForceV2Zero = false;
+ assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask");
// Attempt to generate the binary blend mask. If an input is zero then
// we can use any lane.
// TODO: generalize the zero matching to any scalar like isShuffleEquivalent.
- unsigned BlendMask = 0;
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- int M = Mask[i];
- if (M < 0)
+ for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
+ int M = TargetMask[i];
+ if (M == SM_SentinelUndef)
continue;
if (M == i)
continue;
if (M == i + Size) {
- BlendMask |= 1u << i;
+ BlendMask |= 1ull << i;
continue;
}
- if (Zeroable[i]) {
- if (V1IsZero) {
+ if (M == SM_SentinelZero) {
+ if (V1IsZeroOrUndef) {
ForceV1Zero = true;
- Mask[i] = i;
+ TargetMask[i] = i;
continue;
}
- if (V2IsZero) {
+ if (V2IsZeroOrUndef) {
ForceV2Zero = true;
- BlendMask |= 1u << i;
- Mask[i] = i + Size;
+ BlendMask |= 1ull << i;
+ TargetMask[i] = i + Size;
continue;
}
}
- return SDValue(); // Shuffled input!
+ return false;
}
+ return true;
+}
+
+uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size, int Scale) {
+ uint64_t ScaledMask = 0;
+ for (int i = 0; i != Size; ++i)
+ if (BlendMask & (1ull << i))
+ ScaledMask |= ((1ull << Scale) - 1) << (i * Scale);
+ return ScaledMask;
+}
+
+/// \brief Try to emit a blend instruction for a shuffle.
+///
+/// This doesn't do any checks for the availability of instructions for blending
+/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
+/// be matched in the backend with the type given. What it does check for is
+/// that the shuffle mask is a blend, or convertible into a blend with zero.
+static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Original,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ SmallVector<int, 64> Mask = createTargetShuffleMask(Original, Zeroable);
+
+ uint64_t BlendMask = 0;
+ bool ForceV1Zero = false, ForceV2Zero = false;
+ if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero,
+ BlendMask))
+ return SDValue();
// Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
if (ForceV1Zero)
@@ -8359,15 +8732,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
if (ForceV2Zero)
V2 = getZeroVector(VT, Subtarget, DAG, DL);
- auto ScaleBlendMask = [](unsigned BlendMask, int Size, int Scale) {
- unsigned ScaledMask = 0;
- for (int i = 0; i != Size; ++i)
- if (BlendMask & (1u << i))
- for (int j = 0; j != Scale; ++j)
- ScaledMask |= 1u << (i * Scale + j);
- return ScaledMask;
- };
-
switch (VT.SimpleTy) {
case MVT::v2f64:
case MVT::v4f32:
@@ -8387,7 +8751,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
if (Subtarget.hasAVX2()) {
// Scale the blend by the number of 32-bit dwords per element.
int Scale = VT.getScalarSizeInBits() / 32;
- BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
+ BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
V1 = DAG.getBitcast(BlendVT, V1);
V2 = DAG.getBitcast(BlendVT, V2);
@@ -8400,7 +8764,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
// For integer shuffles we need to expand the mask and cast the inputs to
// v8i16s prior to blending.
int Scale = 8 / VT.getVectorNumElements();
- BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
+ BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
V1 = DAG.getBitcast(MVT::v8i16, V1);
V2 = DAG.getBitcast(MVT::v8i16, V2);
return DAG.getBitcast(VT,
@@ -8417,7 +8781,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
BlendMask = 0;
for (int i = 0; i < 8; ++i)
if (RepeatedMask[i] >= 8)
- BlendMask |= 1u << i;
+ BlendMask |= 1ull << i;
return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
}
@@ -8428,6 +8792,13 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
"256-bit byte-blends require AVX2 support!");
+ if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
+ MVT IntegerType =
+ MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
+ SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
+ return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
+ }
+
// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
if (SDValue Masked =
lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
@@ -8465,7 +8836,17 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
VT, DAG.getNode(ISD::VSELECT, DL, BlendVT,
DAG.getBuildVector(BlendVT, DL, VSELECTMask), V1, V2));
}
-
+ case MVT::v16f32:
+ case MVT::v8f64:
+ case MVT::v8i64:
+ case MVT::v16i32:
+ case MVT::v32i16:
+ case MVT::v64i8: {
+ MVT IntegerType =
+ MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
+ SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
+ return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
+ }
default:
llvm_unreachable("Not a supported integer vector type!");
}
@@ -8503,7 +8884,7 @@ static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask);
}
-/// \brief Generic routine to decompose a shuffle and blend into indepndent
+/// \brief Generic routine to decompose a shuffle and blend into independent
/// blends and permutes.
///
/// This matches the extremely common pattern for handling combined
@@ -8757,7 +9138,7 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
unsigned ScalarSizeInBits,
ArrayRef<int> Mask, int MaskOffset,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget) {
int Size = Mask.size();
unsigned SizeInBits = Size * ScalarSizeInBits;
@@ -8819,7 +9200,7 @@ static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
int Size = Mask.size();
@@ -8855,12 +9236,12 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SelectionDAG &DAG) {
int Size = Mask.size();
int HalfSize = Size / 2;
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
- assert(!Zeroable.all() && "Fully zeroable shuffle mask");
+ assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask");
// Upper half must be undefined.
if (!isUndefInRange(Mask, HalfSize, HalfSize))
@@ -8987,7 +9368,7 @@ static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
/// Given a specific number of elements, element bit width, and extension
/// stride, produce either a zero or any extension based on the available
/// features of the subtarget. The extended elements are consecutive and
-/// begin and can start from an offseted element index in the input; to
+/// begin and can start from an offsetted element index in the input; to
/// avoid excess shuffling the offset must either being in the bottom lane
/// or at the start of a higher lane. All extended elements must be from
/// the same lane.
@@ -9027,21 +9408,14 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
// Found a valid zext mask! Try various lowering strategies based on the
// input type and available ISA extensions.
if (Subtarget.hasSSE41()) {
- // Not worth offseting 128-bit vectors if scale == 2, a pattern using
+ // Not worth offsetting 128-bit vectors if scale == 2, a pattern using
// PUNPCK will catch this in a later shuffle match.
if (Offset && Scale == 2 && VT.is128BitVector())
return SDValue();
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
InputV = ShuffleOffset(InputV);
-
- // For 256-bit vectors, we only need the lower (128-bit) input half.
- // For 512-bit vectors, we only need the lower input half or quarter.
- if (VT.getSizeInBits() > 128)
- InputV = extractSubVector(InputV, 0, DAG, DL,
- std::max(128, (int)VT.getSizeInBits() / Scale));
-
- InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV);
+ InputV = getExtendInVec(X86ISD::VZEXT, DL, ExtVT, InputV, DAG);
return DAG.getBitcast(VT, InputV);
}
@@ -9158,7 +9532,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
/// are both incredibly common and often quite performance sensitive.
static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable, const X86Subtarget &Subtarget,
+ const APInt &Zeroable, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
int Bits = VT.getSizeInBits();
int NumLanes = Bits / 128;
@@ -9314,7 +9688,7 @@ static bool isShuffleFoldableLoad(SDValue V) {
/// across all subtarget feature sets.
static SDValue lowerVectorShuffleAsElementInsertion(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable, const X86Subtarget &Subtarget,
+ const APInt &Zeroable, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT ExtVT = VT;
MVT EltVT = VT.getVectorElementType();
@@ -9612,7 +9986,16 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
if (((BroadcastIdx * EltSize) % 128) != 0)
return SDValue();
- MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 128 / EltSize);
+ // The shuffle input might have been a bitcast we looked through; look at
+ // the original input vector. Emit an EXTRACT_SUBVECTOR of that type; we'll
+ // later bitcast it to BroadcastVT.
+ MVT SrcVT = V.getSimpleValueType();
+ assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() &&
+ "Unexpected vector element size");
+ assert((SrcVT.is256BitVector() || SrcVT.is512BitVector()) &&
+ "Unexpected vector size");
+
+ MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(), 128 / EltSize);
V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V,
DAG.getIntPtrConstant(BroadcastIdx, DL));
}
@@ -9642,6 +10025,12 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
}
+ // We only support broadcasting from 128-bit vectors to minimize the
+ // number of patterns we need to deal with in isel. So extract down to
+ // 128-bits.
+ if (SrcVT.getSizeInBits() > 128)
+ V = extract128BitVector(V, 0, DAG, DL);
+
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
}
@@ -9653,7 +10042,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
// elements are zeroable.
static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
unsigned &InsertPSMask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
@@ -9742,7 +10131,7 @@ static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
@@ -9877,7 +10266,7 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
/// it is better to avoid lowering through this for integer vectors where
/// possible.
static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -9959,7 +10348,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// it falls back to the floating point shuffle operation with appropriate bit
/// casting.
static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -10178,7 +10567,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
/// domain crossing penalties, as these are sufficient to implement all v4f32
/// shuffles.
static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -10261,7 +10650,7 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// We try to handle these with integer-domain shuffles where we can, but for
/// blends we use the floating point domain blend instructions.
static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -10353,7 +10742,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// We implement this with SHUFPS because it can blend from two vectors.
// Because we're going to eventually use SHUFPS, we use SHUFPS even to build
- // up the inputs, bypassing domain shift penalties that we would encur if we
+ // up the inputs, bypassing domain shift penalties that we would incur if we
// directly used PSHUFD on Nehalem and older. For newer chips, this isn't
// relevant.
SDValue CastV1 = DAG.getBitcast(MVT::v4f32, V1);
@@ -10384,18 +10773,16 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");
MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
- assert(Mask.size() == 8 && "Shuffle mask length doen't match!");
+ assert(Mask.size() == 8 && "Shuffle mask length doesn't match!");
MutableArrayRef<int> LoMask = Mask.slice(0, 4);
MutableArrayRef<int> HiMask = Mask.slice(4, 4);
SmallVector<int, 4> LoInputs;
- std::copy_if(LoMask.begin(), LoMask.end(), std::back_inserter(LoInputs),
- [](int M) { return M >= 0; });
+ copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; });
std::sort(LoInputs.begin(), LoInputs.end());
LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end());
SmallVector<int, 4> HiInputs;
- std::copy_if(HiMask.begin(), HiMask.end(), std::back_inserter(HiInputs),
- [](int M) { return M >= 0; });
+ copy_if(HiMask, std::back_inserter(HiInputs), [](int M) { return M >= 0; });
std::sort(HiInputs.begin(), HiInputs.end());
HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end());
int NumLToL =
@@ -10574,7 +10961,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
};
if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
- else if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3))
+ if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3))
return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0);
// At this point there are at most two inputs to the low and high halves from
@@ -10830,7 +11217,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
/// blend if only one input is used.
static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable, SelectionDAG &DAG, bool &V1InUse,
+ const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse,
bool &V2InUse) {
SDValue V1Mask[16];
SDValue V2Mask[16];
@@ -10891,7 +11278,7 @@ static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
/// halves of the inputs separately (making them have relatively few inputs)
/// and then concatenate them.
static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -11075,7 +11462,7 @@ static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
/// the existing lowering for v8i16 blends on each half, finally PACK-ing them
/// back together.
static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -11132,14 +11519,13 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (!canWidenViaDuplication(Mask))
return SDValue();
SmallVector<int, 4> LoInputs;
- std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(LoInputs),
- [](int M) { return M >= 0 && M < 8; });
+ copy_if(Mask, std::back_inserter(LoInputs),
+ [](int M) { return M >= 0 && M < 8; });
std::sort(LoInputs.begin(), LoInputs.end());
LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()),
LoInputs.end());
SmallVector<int, 4> HiInputs;
- std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(HiInputs),
- [](int M) { return M >= 8; });
+ copy_if(Mask, std::back_inserter(HiInputs), [](int M) { return M >= 8; });
std::sort(HiInputs.begin(), HiInputs.end());
HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()),
HiInputs.end());
@@ -11193,7 +11579,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
PostDupI16Shuffle[i / 2] = MappedMask;
else
assert(PostDupI16Shuffle[i / 2] == MappedMask &&
- "Conflicting entrties in the original shuffle!");
+ "Conflicting entries in the original shuffle!");
}
return DAG.getBitcast(
MVT::v16i8,
@@ -11365,7 +11751,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// dispatches to the lowering routines accordingly.
static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
switch (VT.SimpleTy) {
@@ -11621,7 +12007,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
/// \brief Handle lowering 2-lane 128-bit shuffles.
static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SmallVector<int, 4> WidenedMask;
@@ -12091,7 +12477,7 @@ static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
unsigned &ShuffleImm,
ArrayRef<int> Mask) {
int NumElts = VT.getVectorNumElements();
- assert(VT.getScalarType() == MVT::f64 &&
+ assert(VT.getScalarSizeInBits() == 64 &&
(NumElts == 2 || NumElts == 4 || NumElts == 8) &&
"Unexpected data type for VSHUFPD");
@@ -12127,6 +12513,9 @@ static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
+ assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&&
+ "Unexpected data type for VSHUFPD");
+
unsigned Immediate = 0;
if (!matchVectorShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
return SDValue();
@@ -12153,7 +12542,7 @@ static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
/// isn't available.
static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12250,7 +12639,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v4i64 shuffling..
static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12338,7 +12727,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
/// isn't available.
static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12414,6 +12803,14 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
V1, V2, DAG, Subtarget))
return V;
+ // For non-AVX512 if the Mask is of 16bit elements in lane then try to split
+ // since after split we get a more efficient code using vpunpcklwd and
+ // vpunpckhwd instrs than vblend.
+ if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
+ if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2,
+ Mask, DAG))
+ return V;
+
// If we have AVX2 then we always want to lower with a blend because at v8 we
// can fully permute the elements.
if (Subtarget.hasAVX2())
@@ -12429,7 +12826,7 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v8i32 shuffling..
static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12445,6 +12842,15 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
+ // For non-AVX512 if the Mask is of 16bit elements in lane then try to split
+ // since after split we get a more efficient code than vblend by using
+ // vpunpcklwd and vpunpckhwd instrs.
+ if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
+ !Subtarget.hasAVX512())
+ if (SDValue V =
+ lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, DAG))
+ return V;
+
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
@@ -12533,7 +12939,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v16i16 shuffling..
static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12619,7 +13025,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v32i8 shuffling..
static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12692,7 +13098,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// together based on the available instructions.
static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// If we have a single input to the zero element, insert that into V1 if we
@@ -12844,7 +13250,7 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12891,12 +13297,16 @@ static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
V2, DAG, Subtarget))
return V;
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
+
return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
-static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12925,6 +13335,10 @@ static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef<int> Mask,
lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
return Unpck;
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
+
// Otherwise, fall back to a SHUFPS sequence.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
}
@@ -12938,7 +13352,7 @@ static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef<int> Mask,
/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12994,12 +13408,16 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
V2, DAG, Subtarget))
return V;
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
+
return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -13062,12 +13480,15 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
V1, V2, DAG, Subtarget))
return V;
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -13109,12 +13530,16 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
}
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
+
return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 64-lane 8-bit integer shuffles.
static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -13159,6 +13584,10 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
return V;
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
+
// FIXME: Implement direct support for this type!
return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
}
@@ -13170,7 +13599,7 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// together based on the available instructions.
static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Subtarget.hasAVX512() &&
@@ -13251,7 +13680,7 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (ISD::isBuildVectorAllZeros(V1.getNode()))
V1 = getZeroVector(ExtVT, Subtarget, DAG, DL);
else if (ISD::isBuildVectorAllOnes(V1.getNode()))
- V1 = getOnesVector(ExtVT, Subtarget, DAG, DL);
+ V1 = getOnesVector(ExtVT, DAG, DL);
else
V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1);
@@ -13260,7 +13689,7 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
else if (ISD::isBuildVectorAllZeros(V2.getNode()))
V2 = getZeroVector(ExtVT, Subtarget, DAG, DL);
else if (ISD::isBuildVectorAllOnes(V2.getNode()))
- V2 = getOnesVector(ExtVT, Subtarget, DAG, DL);
+ V2 = getOnesVector(ExtVT, DAG, DL);
else
V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2);
@@ -13392,8 +13821,8 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
// We actually see shuffles that are entirely re-arrangements of a set of
// zero inputs. This mostly happens while decomposing complex shuffles into
// simple ones. Directly lower these as a buildvector of zeros.
- SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
- if (Zeroable.all())
+ APInt Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ if (Zeroable.isAllOnesValue())
return getZeroVector(VT, Subtarget, DAG, DL);
// Try to collapse shuffles into using a vector type with fewer elements but
@@ -13569,10 +13998,14 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const
"Unexpected vector type in ExtractBitFromMaskVector");
// variable index can't be handled in mask registers,
- // extend vector to VR512
+ // extend vector to VR512/128
if (!isa<ConstantSDNode>(Idx)) {
- MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
- SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ // Extending v8i1/v16i1 to 512-bit get better performance on KNL
+ // than extending to 128/256bit.
+ unsigned VecSize = (NumElts <= 4 ? 128 : 512);
+ MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize/NumElts), NumElts);
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVT, Vec);
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
ExtVT.getVectorElementType(), Ext, Idx);
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
@@ -13590,9 +14023,9 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const
}
unsigned MaxSift = VecVT.getVectorNumElements() - 1;
if (MaxSift - IdxVal)
- Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
- Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(MaxSift, dl, MVT::i8));
return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec,
DAG.getIntPtrConstant(0, dl));
@@ -13610,24 +14043,36 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return ExtractBitFromMaskVector(Op, DAG);
if (!isa<ConstantSDNode>(Idx)) {
- if (VecVT.is512BitVector() ||
- (VecVT.is256BitVector() && Subtarget.hasInt256() &&
- VecVT.getScalarSizeInBits() == 32)) {
-
- MVT MaskEltVT =
- MVT::getIntegerVT(VecVT.getScalarSizeInBits());
- MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() /
- MaskEltVT.getSizeInBits());
+ // Its more profitable to go through memory (1 cycles throughput)
+ // than using VMOVD + VPERMV/PSHUFB sequence ( 2/3 cycles throughput)
+ // IACA tool was used to get performance estimation
+ // (https://software.intel.com/en-us/articles/intel-architecture-code-analyzer)
+ //
+ // example : extractelement <16 x i8> %a, i32 %i
+ //
+ // Block Throughput: 3.00 Cycles
+ // Throughput Bottleneck: Port5
+ //
+ // | Num Of | Ports pressure in cycles | |
+ // | Uops | 0 - DV | 5 | 6 | 7 | |
+ // ---------------------------------------------
+ // | 1 | | 1.0 | | | CP | vmovd xmm1, edi
+ // | 1 | | 1.0 | | | CP | vpshufb xmm0, xmm0, xmm1
+ // | 2 | 1.0 | 1.0 | | | CP | vpextrb eax, xmm0, 0x0
+ // Total Num Of Uops: 4
+ //
+ //
+ // Block Throughput: 1.00 Cycles
+ // Throughput Bottleneck: PORT2_AGU, PORT3_AGU, Port4
+ //
+ // | | Ports pressure in cycles | |
+ // |Uops| 1 | 2 - D |3 - D | 4 | 5 | |
+ // ---------------------------------------------------------
+ // |2^ | | 0.5 | 0.5 |1.0| |CP| vmovaps xmmword ptr [rsp-0x18], xmm0
+ // |1 |0.5| | | |0.5| | lea rax, ptr [rsp-0x18]
+ // |1 | |0.5, 0.5|0.5, 0.5| | |CP| mov al, byte ptr [rdi+rax*1]
+ // Total Num Of Uops: 4
- Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT);
- auto PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT,
- getZeroVector(MaskVT, Subtarget, DAG, dl), Idx,
- DAG.getConstant(0, dl, PtrVT));
- SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Perm,
- DAG.getConstant(0, dl, PtrVT));
- }
return SDValue();
}
@@ -13675,7 +14120,33 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
return Res;
- // TODO: handle v16i8.
+ // TODO: We only extract a single element from v16i8, we can probably afford
+ // to be more aggressive here before using the default approach of spilling to
+ // stack.
+ if (VT.getSizeInBits() == 8 && Op->isOnlyUserOf(Vec.getNode())) {
+ // Extract either the lowest i32 or any i16, and extract the sub-byte.
+ int DWordIdx = IdxVal / 4;
+ if (DWordIdx == 0) {
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getBitcast(MVT::v4i32, Vec),
+ DAG.getIntPtrConstant(DWordIdx, dl));
+ int ShiftVal = (IdxVal % 4) * 8;
+ if (ShiftVal != 0)
+ Res = DAG.getNode(ISD::SRL, dl, MVT::i32, Res,
+ DAG.getConstant(ShiftVal, dl, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ }
+
+ int WordIdx = IdxVal / 2;
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
+ DAG.getBitcast(MVT::v8i16, Vec),
+ DAG.getIntPtrConstant(WordIdx, dl));
+ int ShiftVal = (IdxVal % 2) * 8;
+ if (ShiftVal != 0)
+ Res = DAG.getNode(ISD::SRL, dl, MVT::i16, Res,
+ DAG.getConstant(ShiftVal, dl, MVT::i16));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ }
if (VT.getSizeInBits() == 32) {
if (IdxVal == 0)
@@ -13734,7 +14205,7 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
if(Vec.isUndef()) {
if (IdxVal)
- EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+ EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
return EltInVec;
}
@@ -13744,21 +14215,21 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
if (IdxVal == 0 ) {
// EltInVec already at correct index and other bits are 0.
// Clean the first bit in source vector.
- Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(1 , dl, MVT::i8));
- Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(1, dl, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
if (IdxVal == NumElems -1) {
// Move the bit to the last position inside the vector.
- EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+ EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
// Clean the last bit in the source vector.
- Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(1, dl, MVT::i8));
- Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(1 , dl, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
@@ -13790,17 +14261,21 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
auto *N2C = cast<ConstantSDNode>(N2);
unsigned IdxVal = N2C->getZExtValue();
- // If we are clearing out a element, we do this more efficiently with a
- // blend shuffle than a costly integer insertion.
- // TODO: would other rematerializable values (e.g. allbits) benefit as well?
+ bool IsZeroElt = X86::isZeroNode(N1);
+ bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);
+
+ // If we are inserting a element, see if we can do this more efficiently with
+ // a blend shuffle with a rematerializable vector than a costly integer
+ // insertion.
// TODO: pre-SSE41 targets will tend to use bit masking - this could still
// be beneficial if we are inserting several zeros and can combine the masks.
- if (X86::isZeroNode(N1) && Subtarget.hasSSE41() && NumElts <= 8) {
- SmallVector<int, 8> ClearMask;
+ if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && NumElts <= 8) {
+ SmallVector<int, 8> BlendMask;
for (unsigned i = 0; i != NumElts; ++i)
- ClearMask.push_back(i == IdxVal ? i + NumElts : i);
- SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, dl);
- return DAG.getVectorShuffle(VT, dl, N0, ZeroVector, ClearMask);
+ BlendMask.push_back(i == IdxVal ? i + NumElts : i);
+ SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl)
+ : DAG.getConstant(-1, dl, VT);
+ return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask);
}
// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
@@ -13837,25 +14312,27 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
}
assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
- if (Subtarget.hasSSE41()) {
- if (EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) {
- unsigned Opc;
- if (VT == MVT::v8i16) {
- Opc = X86ISD::PINSRW;
- } else {
- assert(VT == MVT::v16i8);
- Opc = X86ISD::PINSRB;
- }
-
- // Transform it so it match pinsr{b,w} which expects a GR32 as its second
- // argument.
- if (N1.getValueType() != MVT::i32)
- N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
- if (N2.getValueType() != MVT::i32)
- N2 = DAG.getIntPtrConstant(IdxVal, dl);
- return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+ // Transform it so it match pinsr{b,w} which expects a GR32 as its second
+ // argument. SSE41 required for pinsrb.
+ if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.hasSSE41())) {
+ unsigned Opc;
+ if (VT == MVT::v8i16) {
+ assert(Subtarget.hasSSE2() && "SSE2 required for PINSRW");
+ Opc = X86ISD::PINSRW;
+ } else {
+ assert(VT == MVT::v16i8 && "PINSRB requires v16i8 vector");
+ assert(Subtarget.hasSSE41() && "SSE41 required for PINSRB");
+ Opc = X86ISD::PINSRB;
}
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(IdxVal, dl);
+ return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+ }
+
+ if (Subtarget.hasSSE41()) {
if (EltVT == MVT::f32) {
// Bits [7:6] of the constant are the source select. This will always be
// zero here. The DAG Combiner may combine an extract_elt index into
@@ -13885,36 +14362,29 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
}
- if (EltVT == MVT::i32 || EltVT == MVT::i64) {
- // PINSR* works with constant index.
+ // PINSR* works with constant index.
+ if (EltVT == MVT::i32 || EltVT == MVT::i64)
return Op;
- }
}
- if (EltVT == MVT::i8)
- return SDValue();
-
- if (EltVT.getSizeInBits() == 16) {
- // Transform it so it match pinsrw which expects a 16-bit value in a GR32
- // as its second argument.
- if (N1.getValueType() != MVT::i32)
- N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
- if (N2.getValueType() != MVT::i32)
- N2 = DAG.getIntPtrConstant(IdxVal, dl);
- return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
- }
return SDValue();
}
-static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
SDLoc dl(Op);
MVT OpVT = Op.getSimpleValueType();
+ // It's always cheaper to replace a xor+movd with xorps and simplifies further
+ // combines.
+ if (X86::isZeroNode(Op.getOperand(0)))
+ return getZeroVector(OpVT, Subtarget, DAG, dl);
+
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
if (!OpVT.is128BitVector()) {
// Insert into a 128-bit vector.
- unsigned SizeFactor = OpVT.getSizeInBits()/128;
+ unsigned SizeFactor = OpVT.getSizeInBits() / 128;
MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(),
OpVT.getVectorNumElements() / SizeFactor);
@@ -13923,9 +14393,13 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
// Insert the 128-bit vector.
return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
}
+ assert(OpVT.is128BitVector() && "Expected an SSE type!");
+
+ // Pass through a v4i32 SCALAR_TO_VECTOR as that's what we use in tblgen.
+ if (OpVT == MVT::v4i32)
+ return Op;
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
- assert(OpVT.is128BitVector() && "Expected an SSE type!");
return DAG.getBitcast(
OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt));
}
@@ -13947,20 +14421,14 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
In.getSimpleValueType().is512BitVector()) &&
"Can only extract from 256-bit or 512-bit vectors");
- if (ResVT.is128BitVector())
- return extract128BitVector(In, IdxVal, DAG, dl);
- if (ResVT.is256BitVector())
- return extract256BitVector(In, IdxVal, DAG, dl);
-
- llvm_unreachable("Unimplemented!");
-}
+ // If the input is a buildvector just emit a smaller one.
+ unsigned ElemsPerChunk = ResVT.getVectorNumElements();
+ if (In.getOpcode() == ISD::BUILD_VECTOR)
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ResVT,
+ makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk));
-static bool areOnlyUsersOf(SDNode *N, ArrayRef<SDValue> ValidUsers) {
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I)
- if (llvm::all_of(ValidUsers,
- [&I](SDValue V) { return V.getNode() != *I; }))
- return false;
- return true;
+ // Everything else is legal.
+ return Op;
}
// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
@@ -13968,83 +14436,9 @@ static bool areOnlyUsersOf(SDNode *N, ArrayRef<SDValue> ValidUsers) {
// the upper bits of a vector.
static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert(Subtarget.hasAVX() && "INSERT_SUBVECTOR requires AVX");
-
- SDLoc dl(Op);
- SDValue Vec = Op.getOperand(0);
- SDValue SubVec = Op.getOperand(1);
- SDValue Idx = Op.getOperand(2);
-
- unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- MVT OpVT = Op.getSimpleValueType();
- MVT SubVecVT = SubVec.getSimpleValueType();
-
- if (OpVT.getVectorElementType() == MVT::i1)
- return insert1BitVector(Op, DAG, Subtarget);
-
- assert((OpVT.is256BitVector() || OpVT.is512BitVector()) &&
- "Can only insert into 256-bit or 512-bit vectors");
+ assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1);
- // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
- // load:
- // (insert_subvector (insert_subvector undef, (load16 addr), 0),
- // (load16 addr + 16), Elts/2)
- // --> load32 addr
- // or:
- // (insert_subvector (insert_subvector undef, (load32 addr), 0),
- // (load32 addr + 32), Elts/2)
- // --> load64 addr
- // or a 16-byte or 32-byte broadcast:
- // (insert_subvector (insert_subvector undef, (load16 addr), 0),
- // (load16 addr), Elts/2)
- // --> X86SubVBroadcast(load16 addr)
- // or:
- // (insert_subvector (insert_subvector undef, (load32 addr), 0),
- // (load32 addr), Elts/2)
- // --> X86SubVBroadcast(load32 addr)
- if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
- Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
- OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
- auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
- if (Idx2 && Idx2->getZExtValue() == 0) {
- SDValue SubVec2 = Vec.getOperand(1);
- // If needed, look through bitcasts to get to the load.
- if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
- bool Fast;
- unsigned Alignment = FirstLd->getAlignment();
- unsigned AS = FirstLd->getAddressSpace();
- const X86TargetLowering *TLI = Subtarget.getTargetLowering();
- if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
- OpVT, AS, Alignment, &Fast) && Fast) {
- SDValue Ops[] = {SubVec2, SubVec};
- if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
- return Ld;
- }
- }
- // If lower/upper loads are the same and the only users of the load, then
- // lower to a VBROADCASTF128/VBROADCASTI128/etc.
- if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
- if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
- areOnlyUsersOf(SubVec2.getNode(), {Op, Vec})) {
- return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
- }
- }
- // If this is subv_broadcast insert into both halves, use a larger
- // subv_broadcast.
- if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) {
- return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
- SubVec.getOperand(0));
- }
- }
- }
-
- if (SubVecVT.is128BitVector())
- return insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
-
- if (SubVecVT.is256BitVector())
- return insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
-
- llvm_unreachable("Unimplemented!");
+ return insert1BitVector(Op, DAG, Subtarget);
}
// Returns the appropriate wrapper opcode for a global reference.
@@ -14062,7 +14456,7 @@ unsigned X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV) const {
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
-// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
+// their target counterpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
@@ -14438,7 +14832,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
Subtarget.isTargetWindowsItanium() ||
Subtarget.isTargetWindowsGNU()) {
// Just use the implicit TLS architecture
- // Need to generate someting similar to:
+ // Need to generate something similar to:
// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
// ; from TEB
// mov ecx, dword [rel _tls_index]: Load index (from C runtime)
@@ -15489,32 +15883,21 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// word to byte only under BWI
if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) // v16i16 -> v16i8
return DAG.getNode(X86ISD::VTRUNC, DL, VT,
- DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
+ getExtendInVec(X86ISD::VSEXT, DL, MVT::v16i32, In, DAG));
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
}
- // Truncate with PACKSS if we are truncating a vector comparison result.
- // TODO: We should be able to support other operations as long as we
- // we are saturating+packing zero/all bits only.
- auto IsPackableComparison = [](SDValue V) {
- unsigned Opcode = V.getOpcode();
- return (Opcode == X86ISD::PCMPGT || Opcode == X86ISD::PCMPEQ ||
- Opcode == X86ISD::CMPP);
- };
-
- if (IsPackableComparison(In) || (In.getOpcode() == ISD::CONCAT_VECTORS &&
- all_of(In->ops(), IsPackableComparison))) {
+ // Truncate with PACKSS if we are truncating a vector zero/all-bits result.
+ if (InVT.getScalarSizeInBits() == DAG.ComputeNumSignBits(In))
if (SDValue V = truncateVectorCompareWithPACKSS(VT, In, DL, DAG, Subtarget))
return V;
- }
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget.hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
In = DAG.getBitcast(MVT::v8i32, In);
- In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
- ShufMask);
+ In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
DAG.getIntPtrConstant(0, DL));
}
@@ -15530,30 +15913,20 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
}
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
- // On AVX2, v8i32 -> v8i16 becomed PSHUFB.
+ // On AVX2, v8i32 -> v8i16 becomes PSHUFB.
if (Subtarget.hasInt256()) {
In = DAG.getBitcast(MVT::v32i8, In);
- SmallVector<SDValue,32> pshufbMask;
- for (unsigned i = 0; i < 2; ++i) {
- pshufbMask.push_back(DAG.getConstant(0x0, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x1, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x4, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x5, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x8, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x9, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0xc, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0xd, DL, MVT::i8));
- for (unsigned j = 0; j < 8; ++j)
- pshufbMask.push_back(DAG.getConstant(0x80, DL, MVT::i8));
- }
- SDValue BV = DAG.getBuildVector(MVT::v32i8, DL, pshufbMask);
- In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV);
+ // The PSHUFB mask:
+ static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 16, 17, 20, 21, 24, 25, 28, 29,
+ -1, -1, -1, -1, -1, -1, -1, -1 };
+ In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1);
In = DAG.getBitcast(MVT::v4i64, In);
- static const int ShufMask[] = {0, 2, -1, -1};
- In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64),
- ShufMask);
+ static const int ShufMask2[] = {0, 2, -1, -1};
+ In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(0, DL));
return DAG.getBitcast(VT, In);
@@ -15572,9 +15945,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
-1, -1, -1, -1, -1, -1, -1, -1};
- SDValue Undef = DAG.getUNDEF(MVT::v16i8);
- OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1);
- OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1);
+ OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1);
OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
@@ -15598,17 +15970,14 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// Prepare truncation shuffle mask
for (unsigned i = 0; i != NumElems; ++i)
MaskVec[i] = i * 2;
- SDValue V = DAG.getVectorShuffle(NVT, DL, DAG.getBitcast(NVT, In),
- DAG.getUNDEF(NVT), MaskVec);
+ In = DAG.getBitcast(NVT, In);
+ SDValue V = DAG.getVectorShuffle(NVT, DL, In, In, MaskVec);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V,
DAG.getIntPtrConstant(0, DL));
}
-SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
-
MVT VT = Op.getSimpleValueType();
if (VT.isVector()) {
@@ -15616,8 +15985,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op,
SDValue Src = Op.getOperand(0);
SDLoc dl(Op);
if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) {
- return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI,
- dl, VT,
+ return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT,
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32)));
}
@@ -15891,7 +16259,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget &Subtarget,
for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
VecIns[i] = DAG.getBitcast(TestVT, VecIns[i]);
- // If more than one full vectors are evaluated, OR them first before PTEST.
+ // If more than one full vector is evaluated, OR them first before PTEST.
for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
// Each iteration will OR 2 nodes and append the result until there is only
// 1 node left, i.e. the final OR'd value of all vectors.
@@ -15900,8 +16268,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget &Subtarget,
VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
}
- return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
- VecIns.back(), VecIns.back());
+ return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back());
}
/// \brief return true if \c Op has a use that doesn't just read flags.
@@ -16366,7 +16733,7 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
}
/// If we have at least two divisions that use the same divisor, convert to
-/// multplication by a reciprocal. This may need to be adjusted for a given
+/// multiplication by a reciprocal. This may need to be adjusted for a given
/// CPU if a division's cost is not at least twice the cost of a multiplication.
/// This is because we still need one division to calculate the reciprocal and
/// then we need two multiplies by that reciprocal as replacements for the
@@ -17241,12 +17608,14 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
// (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
// (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
+ // (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y
+ // (select (and (x , 0x1) == 0), y, (z | y) ) -> (-(and (x , 0x1)) & z ) | y
if (Cond.getOpcode() == X86ISD::SETCC &&
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
isNullConstant(Cond.getOperand(1).getOperand(1))) {
SDValue Cmp = Cond.getOperand(1);
-
- unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+ unsigned CondCode =
+ cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
@@ -17283,6 +17652,43 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (!isNullConstant(Op2))
Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
return Res;
+ } else if (!Subtarget.hasCMov() && CondCode == X86::COND_E &&
+ Cmp.getOperand(0).getOpcode() == ISD::AND &&
+ isOneConstant(Cmp.getOperand(0).getOperand(1))) {
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ SDValue Src1, Src2;
+ // true if Op2 is XOR or OR operator and one of its operands
+ // is equal to Op1
+ // ( a , a op b) || ( b , a op b)
+ auto isOrXorPattern = [&]() {
+ if ((Op2.getOpcode() == ISD::XOR || Op2.getOpcode() == ISD::OR) &&
+ (Op2.getOperand(0) == Op1 || Op2.getOperand(1) == Op1)) {
+ Src1 =
+ Op2.getOperand(0) == Op1 ? Op2.getOperand(1) : Op2.getOperand(0);
+ Src2 = Op1;
+ return true;
+ }
+ return false;
+ };
+
+ if (isOrXorPattern()) {
+ SDValue Neg;
+ unsigned int CmpSz = CmpOp0.getSimpleValueType().getSizeInBits();
+ // we need mask of all zeros or ones with same size of the other
+ // operands.
+ if (CmpSz > VT.getSizeInBits())
+ Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0);
+ else if (CmpSz < VT.getSizeInBits())
+ Neg = DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)),
+ DAG.getConstant(1, DL, VT));
+ else
+ Neg = CmpOp0;
+ SDValue Mask = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Neg); // -(and (x, 0x1))
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
+ return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); // And Op y
+ }
}
}
@@ -17423,17 +17829,10 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
// SKX processor
if ((InVTElt == MVT::i1) &&
- (((Subtarget.hasBWI() && Subtarget.hasVLX() &&
- VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) ||
-
- ((Subtarget.hasBWI() && VT.is512BitVector() &&
- VTElt.getSizeInBits() <= 16)) ||
+ (((Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16)) ||
- ((Subtarget.hasDQI() && Subtarget.hasVLX() &&
- VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) ||
+ ((Subtarget.hasDQI() && VTElt.getSizeInBits() >= 32))))
- ((Subtarget.hasDQI() && VT.is512BitVector() &&
- VTElt.getSizeInBits() >= 32))))
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
unsigned NumElts = VT.getVectorNumElements();
@@ -17441,8 +17840,8 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
if (VT.is512BitVector() && InVTElt != MVT::i1 &&
(NumElts == 8 || NumElts == 16 || Subtarget.hasBWI())) {
if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
- return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
- return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+ return getExtendInVec(In.getOpcode(), dl, VT, In.getOperand(0), DAG);
+ return getExtendInVec(X86ISD::VSEXT, dl, VT, In, DAG);
}
if (InVTElt != MVT::i1)
@@ -17454,10 +17853,10 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
SDValue V;
if (Subtarget.hasDQI()) {
- V = DAG.getNode(X86ISD::VSEXT, dl, ExtVT, In);
+ V = getExtendInVec(X86ISD::VSEXT, dl, ExtVT, In, DAG);
assert(!VT.is512BitVector() && "Unexpected vector type");
} else {
- SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl);
+ SDValue NegOne = getOnesVector(ExtVT, DAG, dl);
SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl);
V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
if (ExtVT == VT)
@@ -17506,11 +17905,15 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
assert((Op.getOpcode() != ISD::ZERO_EXTEND_VECTOR_INREG ||
InVT == MVT::v64i8) && "Zero extend only for v64i8 input!");
- // SSE41 targets can use the pmovsx* instructions directly.
- unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ?
- X86ISD::VSEXT : X86ISD::VZEXT;
- if (Subtarget.hasSSE41())
+ // SSE41 targets can use the pmovsx* instructions directly for 128-bit results,
+ // so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still
+ // need to be handled here for 256/512-bit results.
+ if (Subtarget.hasInt256()) {
+ assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");
+ unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ?
+ X86ISD::VSEXT : X86ISD::VZEXT;
return DAG.getNode(ExtOpc, dl, VT, In);
+ }
// We should only get here for sign extend.
assert(Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG &&
@@ -17595,8 +17998,8 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements() / 2);
- OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo);
- OpHi = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpHi);
+ OpLo = DAG.getSignExtendVectorInReg(OpLo, dl, HalfVT);
+ OpHi = DAG.getSignExtendVectorInReg(OpHi, dl, HalfVT);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
@@ -17674,7 +18077,8 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
MVT VT = Op.getValueType().getSimpleVT();
unsigned NumElts = VT.getVectorNumElements();
- if ((Subtarget.hasVLX() && Subtarget.hasBWI() && Subtarget.hasDQI()) ||
+ if ((Subtarget.hasBWI() && NumElts >= 32) ||
+ (Subtarget.hasDQI() && NumElts < 16) ||
NumElts == 16) {
// Load and extend - everything is legal
if (NumElts < 8) {
@@ -17703,7 +18107,7 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
if (NumElts <= 8) {
// A subset, assume that we have only AVX-512F
- unsigned NumBitsToLoad = NumElts < 8 ? 8 : NumElts;
+ unsigned NumBitsToLoad = 8;
MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad);
SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(),
Ld->getBasePtr(),
@@ -17911,7 +18315,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget,
if (Ext == ISD::SEXTLOAD) {
// If we have SSE4.1, we can directly emit a VSEXT node.
if (Subtarget.hasSSE41()) {
- SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
+ SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, RegVT, SlicedVec, DAG);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Sext;
}
@@ -18469,6 +18873,11 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
SelectionDAG &DAG) {
MVT ElementType = VT.getVectorElementType();
+ // Bitcast the source vector to the output type, this is mainly necessary for
+ // vXi8/vXi64 shifts.
+ if (VT != SrcOp.getSimpleValueType())
+ SrcOp = DAG.getBitcast(VT, SrcOp);
+
// Fold this packed shift into its first operand if ShiftAmt is 0.
if (ShiftAmt == 0)
return SrcOp;
@@ -18485,9 +18894,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
&& "Unknown target vector shift-by-constant node");
// Fold this packed vector shift into a build vector if SrcOp is a
- // vector of Constants or UNDEFs, and SrcOp valuetype is the same as VT.
- if (VT == SrcOp.getSimpleValueType() &&
- ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
+ // vector of Constants or UNDEFs.
+ if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
SmallVector<SDValue, 8> Elts;
unsigned NumElts = SrcOp->getNumOperands();
ConstantSDNode *ND;
@@ -18578,11 +18986,11 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
ShAmt = ShAmt.getOperand(0);
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v8i16, ShAmt);
- ShAmt = DAG.getNode(X86ISD::VZEXT, SDLoc(ShAmt), MVT::v2i64, ShAmt);
+ ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
} else if (Subtarget.hasSSE41() &&
ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
- ShAmt = DAG.getNode(X86ISD::VZEXT, SDLoc(ShAmt), MVT::v2i64, ShAmt);
+ ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
} else {
SmallVector<SDValue, 4> ShOps = {ShAmt, DAG.getConstant(0, dl, SVT),
DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
@@ -18853,6 +19261,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
SDValue Src2 = Op.getOperand(2);
SDValue passThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
+ unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+ if (IntrWithRoundingModeOpcode != 0) {
+ SDValue Rnd = Op.getOperand(5);
+ if (!isRoundModeCurDirection(Rnd))
+ return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+ dl, VT, Src1, Src2, Rnd),
+ Mask, passThru, Subtarget, DAG);
+ }
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2),
Mask, passThru, Subtarget, DAG);
}
@@ -19306,6 +19722,15 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
Src2, Src1);
return DAG.getBitcast(VT, Res);
}
+ case MASK_BINOP: {
+ MVT VT = Op.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits());
+
+ SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl);
+ SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl);
+ SDValue Res = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Src2);
+ return DAG.getBitcast(VT, Res);
+ }
case FIXUPIMMS:
case FIXUPIMMS_MASKZ:
case FIXUPIMM:
@@ -19478,6 +19903,33 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+ case Intrinsic::x86_avx512_knot_w: {
+ SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
+ SDValue RHS = DAG.getConstant(1, dl, MVT::v16i1);
+ SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS);
+ return DAG.getBitcast(MVT::i16, Res);
+ }
+
+ case Intrinsic::x86_avx512_kandn_w: {
+ SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
+ // Invert LHS for the not.
+ LHS = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS,
+ DAG.getConstant(1, dl, MVT::v16i1));
+ SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
+ SDValue Res = DAG.getNode(ISD::AND, dl, MVT::v16i1, LHS, RHS);
+ return DAG.getBitcast(MVT::i16, Res);
+ }
+
+ case Intrinsic::x86_avx512_kxnor_w: {
+ SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
+ SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
+ SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS);
+ // Invert result for the not.
+ Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, Res,
+ DAG.getConstant(1, dl, MVT::v16i1));
+ return DAG.getBitcast(MVT::i16, Res);
+ }
+
case Intrinsic::x86_sse42_pcmpistria128:
case Intrinsic::x86_sse42_pcmpestria128:
case Intrinsic::x86_sse42_pcmpistric128:
@@ -19603,6 +20055,28 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
}
}
+static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain,
+ const X86Subtarget &Subtarget) {
+ SDLoc dl(Op);
+ auto *C = cast<ConstantSDNode>(ScaleOp);
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
+ EVT MaskVT = Mask.getValueType();
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
+ SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ // If source is undef or we know it won't be used, use a zero vector
+ // to break register dependency.
+ // TODO: use undef instead and let ExecutionDepsFix deal with it?
+ if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
+ Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
+ SDValue Ops[] = {Src, Base, Scale, Index, Disp, Segment, Mask, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
+ return DAG.getMergeValues(RetOps, dl);
+}
+
static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain,
@@ -19617,7 +20091,10 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- if (Src.isUndef())
+ // If source is undef or we know it won't be used, use a zero vector
+ // to break register dependency.
+ // TODO: use undef instead and let ExecutionDepsFix deal with it?
+ if (Src.isUndef() || ISD::isBuildVectorAllOnes(VMask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
SDValue Ops[] = {Src, VMask, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
@@ -19656,7 +20133,6 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
MVT MaskVT =
MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
- //SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
return SDValue(Res, 0);
@@ -19928,6 +20404,16 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
SDValue(Result.getNode(), 2));
}
+ case GATHER_AVX2: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Src = Op.getOperand(2);
+ SDValue Base = Op.getOperand(3);
+ SDValue Index = Op.getOperand(4);
+ SDValue Mask = Op.getOperand(5);
+ SDValue Scale = Op.getOperand(6);
+ return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
+ Scale, Chain, Subtarget);
+ }
case GATHER: {
//gather(v1, mask, index, base, scale);
SDValue Chain = Op.getOperand(0);
@@ -19953,8 +20439,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
case PREFETCH: {
SDValue Hint = Op.getOperand(6);
unsigned HintVal = cast<ConstantSDNode>(Hint)->getZExtValue();
- assert(HintVal < 2 && "Wrong prefetch hint in intrinsic: should be 0 or 1");
- unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0);
+ assert((HintVal == 2 || HintVal == 3) &&
+ "Wrong prefetch hint in intrinsic: should be 2 or 3");
+ unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0);
SDValue Chain = Op.getOperand(0);
SDValue Mask = Op.getOperand(2);
SDValue Index = Op.getOperand(3);
@@ -20368,7 +20855,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
// Check that ECX wasn't needed by an 'inreg' parameter.
FunctionType *FTy = Func->getFunctionType();
- const AttributeSet &Attrs = Func->getAttributes();
+ const AttributeList &Attrs = Func->getAttributes();
if (!Attrs.isEmpty() && !Func->isVarArg()) {
unsigned InRegCount = 0;
@@ -20802,9 +21289,10 @@ static SDValue Lower512IntArith(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
}
-static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) {
- if (Op.getValueType() == MVT::i1)
- return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(),
+static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ if (VT.getScalarType() == MVT::i1)
+ return DAG.getNode(ISD::XOR, SDLoc(Op), VT,
Op.getOperand(0), Op.getOperand(1));
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
@@ -20812,14 +21300,23 @@ static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) {
return Lower256IntArith(Op, DAG);
}
-static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
- if (Op.getValueType() == MVT::i1)
- return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(),
- Op.getOperand(0), Op.getOperand(1));
+static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
- return Lower256IntArith(Op, DAG);
+ MVT VT = Op.getSimpleValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ SDLoc dl(Op);
+ SDValue Src = Op.getOperand(0);
+ SDValue Lo = extract128BitVector(Src, 0, DAG, dl);
+ SDValue Hi = extract128BitVector(Src, NumElems / 2, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType();
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(ISD::ABS, dl, NewVT, Lo),
+ DAG.getNode(ISD::ABS, dl, NewVT, Hi));
}
static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
@@ -20834,7 +21331,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
- if (VT == MVT::i1)
+ if (VT.getScalarType() == MVT::i1)
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
// Decompose 256-bit ops into smaller 128-bit ops.
@@ -20874,8 +21371,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
// Extract the lo parts and sign extend to i16
SDValue ALo, BLo;
if (Subtarget.hasSSE41()) {
- ALo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, A);
- BLo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, B);
+ ALo = DAG.getSignExtendVectorInReg(A, dl, ExVT);
+ BLo = DAG.getSignExtendVectorInReg(B, dl, ExVT);
} else {
const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
-1, 4, -1, 5, -1, 6, -1, 7};
@@ -20894,8 +21391,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
-1, -1, -1, -1, -1, -1, -1, -1};
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- AHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, AHi);
- BHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, BHi);
+ AHi = DAG.getSignExtendVectorInReg(AHi, dl, ExVT);
+ BHi = DAG.getSignExtendVectorInReg(BHi, dl, ExVT);
} else {
const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11,
-1, 12, -1, 13, -1, 14, -1, 15};
@@ -21056,8 +21553,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, HiMask));
}
- SDValue ExA = DAG.getNode(ExSSE41, dl, MVT::v16i16, A);
- SDValue ExB = DAG.getNode(ExSSE41, dl, MVT::v16i16, B);
+ SDValue ExA = getExtendInVec(ExSSE41, dl, MVT::v16i16, A, DAG);
+ SDValue ExB = getExtendInVec(ExSSE41, dl, MVT::v16i16, B, DAG);
SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB);
SDValue MulH = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul,
DAG.getConstant(8, dl, MVT::v16i16));
@@ -21073,8 +21570,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
// Extract the lo parts and zero/sign extend to i16.
SDValue ALo, BLo;
if (Subtarget.hasSSE41()) {
- ALo = DAG.getNode(ExSSE41, dl, ExVT, A);
- BLo = DAG.getNode(ExSSE41, dl, ExVT, B);
+ ALo = getExtendInVec(ExSSE41, dl, ExVT, A, DAG);
+ BLo = getExtendInVec(ExSSE41, dl, ExVT, B, DAG);
} else {
const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
-1, 4, -1, 5, -1, 6, -1, 7};
@@ -21093,8 +21590,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
-1, -1, -1, -1, -1, -1, -1, -1};
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- AHi = DAG.getNode(ExSSE41, dl, ExVT, AHi);
- BHi = DAG.getNode(ExSSE41, dl, ExVT, BHi);
+ AHi = getExtendInVec(ExSSE41, dl, ExVT, AHi, DAG);
+ BHi = getExtendInVec(ExSSE41, dl, ExVT, BHi, DAG);
} else {
const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11,
-1, 12, -1, 13, -1, 14, -1, 15};
@@ -21148,8 +21645,8 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
MachinePointerInfo(), /* Alignment = */ 16);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Ty = PointerType::get(ArgTy,0);
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
}
@@ -21157,11 +21654,15 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
getPointerTy(DAG.getDataLayout()));
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(getLibcallCallingConv(LC),
- static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()),
- Callee, std::move(Args))
- .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(dl)
+ .setChain(InChain)
+ .setLibCallee(
+ getLibcallCallingConv(LC),
+ static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()), Callee,
+ std::move(Args))
+ .setInRegister()
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return DAG.getBitcast(VT, CallInfo.first);
@@ -21269,15 +21770,15 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
if (VT.getScalarSizeInBits() < 16)
return false;
- if (VT.is512BitVector() &&
+ if (VT.is512BitVector() && Subtarget.hasAVX512() &&
(VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI()))
return true;
- bool LShift = VT.is128BitVector() ||
- (VT.is256BitVector() && Subtarget.hasInt256());
+ bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (VT.is256BitVector() && Subtarget.hasInt256());
- bool AShift = LShift && (Subtarget.hasVLX() ||
- (VT != MVT::v2i64 && VT != MVT::v4i64));
+ bool AShift = LShift && (Subtarget.hasAVX512() ||
+ (VT != MVT::v2i64 && VT != MVT::v4i64));
return (Opcode == ISD::SRA) ? AShift : LShift;
}
@@ -21301,7 +21802,7 @@ static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI())
return false;
- if (VT.is512BitVector() || Subtarget.hasVLX())
+ if (Subtarget.hasAVX512())
return true;
bool LShift = VT.is128BitVector() || VT.is256BitVector();
@@ -22062,10 +22563,10 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// A subtract of one will be selected as a INC. Note that INC doesn't
// set CF, so we can't do this for UADDO.
if (isOneConstant(RHS)) {
- BaseOp = X86ISD::INC;
- Cond = X86::COND_O;
- break;
- }
+ BaseOp = X86ISD::INC;
+ Cond = X86::COND_O;
+ break;
+ }
BaseOp = X86ISD::ADD;
Cond = X86::COND_O;
break;
@@ -22077,10 +22578,10 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// A subtract of one will be selected as a DEC. Note that DEC doesn't
// set CF, so we can't do this for USUBO.
if (isOneConstant(RHS)) {
- BaseOp = X86ISD::DEC;
- Cond = X86::COND_O;
- break;
- }
+ BaseOp = X86ISD::DEC;
+ Cond = X86::COND_O;
+ break;
+ }
BaseOp = X86ISD::SUB;
Cond = X86::COND_O;
break;
@@ -22470,7 +22971,7 @@ static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
// index into a in-register pre-computed pop count table. We then split up the
// input vector in two new ones: (1) a vector with only the shifted-right
// higher nibbles for each byte and (2) a vector with the lower nibbles (and
- // masked out higher ones) for each byte. PSHUB is used separately with both
+ // masked out higher ones) for each byte. PSHUFB is used separately with both
// to index the in-register table. Next, both are added and the result is a
// i8 vector where each element contains the pop count for input byte.
//
@@ -22867,8 +23368,8 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
Entry.Node = Arg;
Entry.Ty = ArgTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
bool isF64 = ArgVT == MVT::f64;
@@ -22885,8 +23386,9 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
: (Type*)VectorType::get(ArgTy, 4);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, RetTy, Callee, std::move(Args));
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
@@ -23086,7 +23588,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
// Mask element has to be i1.
MVT MaskEltTy = Mask.getSimpleValueType().getScalarType();
assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) &&
- "We handle 4x32, 4x64 and 2x64 vectors only in this casse");
+ "We handle 4x32, 4x64 and 2x64 vectors only in this case");
MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec);
@@ -23142,7 +23644,7 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
// Mask element has to be i1.
MVT MaskEltTy = Mask.getSimpleValueType().getScalarType();
assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) &&
- "We handle 4x32, 4x64 and 2x64 vectors only in this casse");
+ "We handle 4x32, 4x64 and 2x64 vectors only in this case");
MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec);
@@ -23202,7 +23704,7 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask);
- // The pass-thru value
+ // The pass-through value
MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
Src0 = ExtendToType(Src0, NewVT, DAG);
@@ -23284,7 +23786,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
- case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
@@ -23303,7 +23805,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SIGN_EXTEND_VECTOR_INREG:
return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG);
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, Subtarget, DAG);
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG);
case ISD::FABS:
@@ -23360,12 +23862,13 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
- case ISD::ADD: return LowerADD(Op, DAG);
- case ISD::SUB: return LowerSUB(Op, DAG);
+ case ISD::ADD:
+ case ISD::SUB: return LowerADD_SUB(Op, DAG);
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
case ISD::UMIN: return LowerMINMAX(Op, DAG);
+ case ISD::ABS: return LowerABS(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
@@ -23768,7 +24271,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
- case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::BLENDI: return "X86ISD::BLENDI";
@@ -23779,16 +24281,19 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
- case X86ISD::ABS: return "X86ISD::ABS";
case X86ISD::CONFLICT: return "X86ISD::CONFLICT";
case X86ISD::FMAX: return "X86ISD::FMAX";
+ case X86ISD::FMAXS: return "X86ISD::FMAXS";
case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
+ case X86ISD::FMAXS_RND: return "X86ISD::FMAX_RND";
case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FMINS: return "X86ISD::FMINS";
case X86ISD::FMIN_RND: return "X86ISD::FMIN_RND";
+ case X86ISD::FMINS_RND: return "X86ISD::FMINS_RND";
case X86ISD::FMAXC: return "X86ISD::FMAXC";
case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
- case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS";
+ case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::FRCPS: return "X86ISD::FRCPS";
case X86ISD::EXTRQI: return "X86ISD::EXTRQI";
@@ -23827,7 +24332,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS";
case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES";
case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS";
- case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND";
case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND";
@@ -23876,6 +24380,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::TESTNM: return "X86ISD::TESTNM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
case X86ISD::KTEST: return "X86ISD::KTEST";
+ case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL";
+ case X86ISD::KSHIFTR: return "X86ISD::KSHIFTR";
case X86ISD::PACKSS: return "X86ISD::PACKSS";
case X86ISD::PACKUS: return "X86ISD::PACKUS";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
@@ -23976,9 +24482,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::RSQRT28: return "X86ISD::RSQRT28";
case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S";
case X86ISD::FADD_RND: return "X86ISD::FADD_RND";
+ case X86ISD::FADDS_RND: return "X86ISD::FADDS_RND";
case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND";
+ case X86ISD::FSUBS_RND: return "X86ISD::FSUBS_RND";
case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND";
+ case X86ISD::FMULS_RND: return "X86ISD::FMULS_RND";
case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
+ case X86ISD::FDIVS_RND: return "X86ISD::FDIVS_RND";
case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
case X86ISD::FSQRTS_RND: return "X86ISD::FSQRTS_RND";
case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
@@ -24302,7 +24812,7 @@ static MachineBasicBlock *emitPCMPSTRM(MachineInstr &MI, MachineBasicBlock *BB,
for (unsigned i = 1; i < NumArgs; ++i) {
MachineOperand &Op = MI.getOperand(i);
if (!(Op.isReg() && Op.isImplicit()))
- MIB.addOperand(Op);
+ MIB.add(Op);
}
if (MI.hasOneMemOperand())
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
@@ -24338,7 +24848,7 @@ static MachineBasicBlock *emitPCMPSTRI(MachineInstr &MI, MachineBasicBlock *BB,
for (unsigned i = 1; i < NumArgs; ++i) {
MachineOperand &Op = MI.getOperand(i);
if (!(Op.isReg() && Op.isImplicit()))
- MIB.addOperand(Op);
+ MIB.add(Op);
}
if (MI.hasOneMemOperand())
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
@@ -24398,7 +24908,7 @@ static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB,
unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
for (int i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
unsigned ValOps = X86::AddrNumOperands;
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
@@ -24413,6 +24923,26 @@ static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB,
return BB;
}
+static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB,
+ const X86Subtarget &Subtarget) {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ // Address into RAX/EAX
+ unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
+ unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
+ for (int i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.add(MI->getOperand(i));
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+
+
MachineBasicBlock *
X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const {
@@ -24536,12 +25066,12 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Load the offset value into a register
OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
- .addOperand(Base)
- .addOperand(Scale)
- .addOperand(Index)
- .addDisp(Disp, UseFPOffset ? 4 : 0)
- .addOperand(Segment)
- .setMemRefs(MMOBegin, MMOEnd);
+ .add(Base)
+ .add(Scale)
+ .add(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .add(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
// Check if there is enough room left to pull this argument.
BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
@@ -24561,12 +25091,12 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Read the reg_save_area address.
unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
- .addOperand(Base)
- .addOperand(Scale)
- .addOperand(Index)
- .addDisp(Disp, 16)
- .addOperand(Segment)
- .setMemRefs(MMOBegin, MMOEnd);
+ .add(Base)
+ .add(Scale)
+ .add(Index)
+ .addDisp(Disp, 16)
+ .add(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
// Zero-extend the offset
unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
@@ -24588,13 +25118,13 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Store it back into the va_list.
BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
- .addOperand(Base)
- .addOperand(Scale)
- .addOperand(Index)
- .addDisp(Disp, UseFPOffset ? 4 : 0)
- .addOperand(Segment)
- .addReg(NextOffsetReg)
- .setMemRefs(MMOBegin, MMOEnd);
+ .add(Base)
+ .add(Scale)
+ .add(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .add(Segment)
+ .addReg(NextOffsetReg)
+ .setMemRefs(MMOBegin, MMOEnd);
// Jump to endMBB
BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
@@ -24608,12 +25138,12 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Load the overflow_area address into a register.
unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
- .addOperand(Base)
- .addOperand(Scale)
- .addOperand(Index)
- .addDisp(Disp, 8)
- .addOperand(Segment)
- .setMemRefs(MMOBegin, MMOEnd);
+ .add(Base)
+ .add(Scale)
+ .add(Index)
+ .addDisp(Disp, 8)
+ .add(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
// If we need to align it, do so. Otherwise, just copy the address
// to OverflowDestReg.
@@ -24644,13 +25174,13 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Store the new overflow address.
BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
- .addOperand(Base)
- .addOperand(Scale)
- .addOperand(Index)
- .addDisp(Disp, 8)
- .addOperand(Segment)
- .addReg(NextAddrReg)
- .setMemRefs(MMOBegin, MMOEnd);
+ .add(Base)
+ .add(Scale)
+ .add(Index)
+ .addDisp(Disp, 8)
+ .add(Segment)
+ .addReg(NextAddrReg)
+ .setMemRefs(MMOBegin, MMOEnd);
// If we branched, emit the PHI to the front of endMBB.
if (offsetMBB) {
@@ -24867,7 +25397,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
//
// (CMOV (CMOV F, T, cc1), T, cc2)
//
- // to two successives branches. For that, we look for another CMOV as the
+ // to two successive branches. For that, we look for another CMOV as the
// following instruction.
//
// Without this, we would add a PHI between the two jumps, which ends up
@@ -25123,12 +25653,12 @@ X86TargetLowering::EmitLoweredAtomicFP(MachineInstr &MI,
// instruction using the same address operands.
if (Operand.isReg())
Operand.setIsKill(false);
- MIB.addOperand(Operand);
+ MIB.add(Operand);
}
MachineInstr *FOpMI = MIB;
MIB = BuildMI(*BB, MI, DL, TII->get(MOp));
for (int i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
MIB.addReg(FOpMI->getOperand(0).getReg(), RegState::Kill);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -25508,7 +26038,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset);
else
- MIB.addOperand(MI.getOperand(MemOpndSlot + i));
+ MIB.add(MI.getOperand(MemOpndSlot + i));
}
if (!UseImmLabel)
MIB.addReg(LabelReg);
@@ -25591,7 +26121,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
// Reload FP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload IP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
@@ -25599,7 +26129,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), LabelOffset);
else
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload SP
@@ -25608,7 +26138,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), SPOffset);
else
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Jump
@@ -25625,7 +26155,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const X86InstrInfo *TII = Subtarget.getInstrInfo();
MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
@@ -25644,8 +26174,6 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
VR = MRI->createVirtualRegister(TRC);
Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
- /* const X86InstrInfo *XII = static_cast<const X86InstrInfo *>(TII); */
-
if (Subtarget.is64Bit())
BuildMI(*MBB, MI, DL, TII->get(X86::LEA64r), VR)
.addReg(X86::RIP)
@@ -25655,7 +26183,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
.addReg(0);
else
BuildMI(*MBB, MI, DL, TII->get(X86::LEA32r), VR)
- .addReg(0) /* XII->getGlobalBaseReg(MF) */
+ .addReg(0) /* TII->getGlobalBaseReg(MF) */
.addImm(1)
.addReg(0)
.addMBB(DispatchBB, Subtarget.classifyBlockAddressReference())
@@ -25677,7 +26205,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MachineFunction *MF = BB->getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
MachineRegisterInfo *MRI = &MF->getRegInfo();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const X86InstrInfo *TII = Subtarget.getInstrInfo();
int FI = MFI.getFunctionContextIndex();
// Get a mapping of the call site numbers to all of the landing pads they're
@@ -25749,9 +26277,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MF->getOrCreateJumpTableInfo(getJumpTableEncoding());
unsigned MJTI = JTI->createJumpTableIndex(LPadList);
- const X86InstrInfo *XII = static_cast<const X86InstrInfo *>(TII);
- const X86RegisterInfo &RI = XII->getRegisterInfo();
-
+ const X86RegisterInfo &RI = TII->getRegisterInfo();
// Add a register mask with no preserved registers. This results in all
// registers being marked as clobbered.
if (RI.hasBasePointer(*MF)) {
@@ -25799,8 +26325,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// N.B. the order the invoke BBs are processed in doesn't matter here.
SmallVector<MachineBasicBlock *, 64> MBBLPads;
- const MCPhysReg *SavedRegs =
- Subtarget.getRegisterInfo()->getCalleeSavedRegs(MF);
+ const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
for (MachineBasicBlock *MBB : InvokeBBs) {
// Remove the landing pad successor from the invoke block and replace it
// with the new dispatch block.
@@ -26033,6 +26558,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr);
case X86::MONITORX:
return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr);
+
+ // Cache line zero
+ case X86::CLZERO:
+ return emitClzero(&MI, BB, Subtarget);
+
// PKU feature
case X86::WRPKRU:
return emitWRPKRU(MI, BB, Subtarget);
@@ -26137,10 +26667,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned BitWidth = KnownZero.getBitWidth();
unsigned Opc = Op.getOpcode();
+ EVT VT = Op.getValueType();
assert((Opc >= ISD::BUILTIN_OP_END ||
Opc == ISD::INTRINSIC_WO_CHAIN ||
Opc == ISD::INTRINSIC_W_CHAIN ||
@@ -26167,44 +26699,91 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
LLVM_FALLTHROUGH;
case X86ISD::SETCC:
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ KnownZero.setBits(1, BitWidth);
break;
case X86ISD::MOVMSK: {
unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements();
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);
+ KnownZero.setBits(NumLoBits, BitWidth);
+ break;
+ }
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI: {
+ if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ShiftImm->getAPIntValue().uge(VT.getScalarSizeInBits())) {
+ KnownZero = APInt::getAllOnesValue(BitWidth);
+ break;
+ }
+
+ DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1);
+ unsigned ShAmt = ShiftImm->getZExtValue();
+ if (Opc == X86ISD::VSHLI) {
+ KnownZero = KnownZero << ShAmt;
+ KnownOne = KnownOne << ShAmt;
+ // Low bits are known zero.
+ KnownZero.setLowBits(ShAmt);
+ } else {
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+ // High bits are known zero.
+ KnownZero.setHighBits(ShAmt);
+ }
+ }
break;
}
case X86ISD::VZEXT: {
SDValue N0 = Op.getOperand(0);
- unsigned NumElts = Op.getValueType().getVectorNumElements();
- unsigned InNumElts = N0.getValueType().getVectorNumElements();
- unsigned InBitWidth = N0.getValueType().getScalarSizeInBits();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ EVT SrcVT = N0.getValueType();
+ unsigned InNumElts = SrcVT.getVectorNumElements();
+ unsigned InBitWidth = SrcVT.getScalarSizeInBits();
+ assert(InNumElts >= NumElts && "Illegal VZEXT input");
KnownZero = KnownOne = APInt(InBitWidth, 0);
- APInt DemandedElts = APInt::getLowBitsSet(InNumElts, NumElts);
- DAG.computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1);
+ APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts);
+ DAG.computeKnownBits(N0, KnownZero, KnownOne, DemandedSrcElts, Depth + 1);
KnownOne = KnownOne.zext(BitWidth);
KnownZero = KnownZero.zext(BitWidth);
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - InBitWidth);
+ KnownZero.setBits(InBitWidth, BitWidth);
break;
}
}
}
unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
- SDValue Op, const SelectionDAG &DAG, unsigned Depth) const {
- // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
- if (Op.getOpcode() == X86ISD::SETCC_CARRY)
- return Op.getScalarValueSizeInBits();
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ unsigned Depth) const {
+ unsigned VTBits = Op.getScalarValueSizeInBits();
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case X86ISD::SETCC_CARRY:
+ // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
+ return VTBits;
- if (Op.getOpcode() == X86ISD::VSEXT) {
- EVT VT = Op.getValueType();
- EVT SrcVT = Op.getOperand(0).getValueType();
- unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
- Tmp += VT.getScalarSizeInBits() - SrcVT.getScalarSizeInBits();
+ case X86ISD::VSEXT: {
+ SDValue Src = Op.getOperand(0);
+ unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
+ Tmp += VTBits - Src.getScalarValueSizeInBits();
return Tmp;
}
+ case X86ISD::VSRAI: {
+ SDValue Src = Op.getOperand(0);
+ unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
+ APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ ShiftVal += Tmp;
+ return ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
+ }
+
+ case X86ISD::PCMPGT:
+ case X86ISD::PCMPEQ:
+ case X86ISD::CMPP:
+ case X86ISD::VPCOM:
+ case X86ISD::VPCOMU:
+ // Vector compares return zero/all-bits result values.
+ return VTBits;
+ }
+
// Fallback case.
return 1;
}
@@ -26228,24 +26807,17 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain,
+ bool AllowFloatDomain, bool AllowIntDomain,
+ SDValue &V1, SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
- // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
- if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
- isUndefOrEqual(Mask[0], 0) &&
- isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
- Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
- return true;
- }
-
- // Match against a VZEXT instruction.
- // TODO: Add 256/512-bit vector support.
- if (!FloatDomain && MaskVT.is128BitVector() && Subtarget.hasSSE41()) {
+ // Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction.
+ // TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
+ if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
unsigned MaxScale = 64 / MaskEltSize;
for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
bool Match = true;
@@ -26255,19 +26827,32 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
}
if (Match) {
- SrcVT = MaskVT;
+ unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
+ SrcVT = MVT::getVectorVT(MaskVT.getScalarType(), SrcSize / MaskEltSize);
+ if (SrcVT != MaskVT)
+ V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
DstVT = MVT::getVectorVT(DstVT, NumDstElts);
- Shuffle = X86ISD::VZEXT;
+ Shuffle = SrcVT != MaskVT ? unsigned(X86ISD::VZEXT)
+ : unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
return true;
}
}
}
+ // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
+ if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
+ isUndefOrEqual(Mask[0], 0) &&
+ isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
+ Shuffle = X86ISD::VZEXT_MOVL;
+ SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
+ return true;
+ }
+
// Check if we have SSE3 which will let us use MOVDDUP etc. The
// instructions are no slower than UNPCKLPD but has the option to
// fold the input operand into even an unaligned memory load.
- if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && FloatDomain) {
+ if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
if (isTargetShuffleEquivalent(Mask, {0, 0})) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v2f64;
@@ -26285,7 +26870,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
- if (MaskVT.is256BitVector() && FloatDomain) {
+ if (MaskVT.is256BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
Shuffle = X86ISD::MOVDDUP;
@@ -26304,7 +26889,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
- if (MaskVT.is512BitVector() && FloatDomain) {
+ if (MaskVT.is512BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX512() &&
"AVX512 required for 512-bit vector shuffles");
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
@@ -26343,24 +26928,26 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// permute instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain,
+ bool AllowFloatDomain,
+ bool AllowIntDomain,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
bool ContainsZeros = false;
- SmallBitVector Zeroable(NumMaskElts, false);
+ APInt Zeroable(NumMaskElts, false);
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
- Zeroable[i] = isUndefOrZero(M);
+ if (isUndefOrZero(M))
+ Zeroable.setBit(i);
ContainsZeros |= (M == SM_SentinelZero);
}
// Attempt to match against byte/bit shifts.
// FIXME: Add 512-bit support.
- if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
- (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
+ if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle,
MaskVT.getScalarSizeInBits(), Mask,
0, Zeroable, Subtarget);
@@ -26423,19 +27010,21 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
// had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
- if (FloatDomain && !Subtarget.hasAVX())
+ if ((AllowFloatDomain && !AllowIntDomain) && !Subtarget.hasAVX())
return false;
// Pre-AVX2 we must use float shuffles on 256-bit vectors.
- if (MaskVT.is256BitVector() && !Subtarget.hasAVX2())
- FloatDomain = true;
+ if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) {
+ AllowFloatDomain = true;
+ AllowIntDomain = false;
+ }
// Check for lane crossing permutes.
if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) {
// PERMPD/PERMQ permutes within a 256-bit vector (AVX2+).
if (Subtarget.hasAVX2() && MaskVT.is256BitVector() && Mask.size() == 4) {
Shuffle = X86ISD::VPERMI;
- ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
+ ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64);
PermuteImm = getV4X86ShuffleImm(Mask);
return true;
}
@@ -26443,7 +27032,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
SmallVector<int, 4> RepeatedMask;
if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) {
Shuffle = X86ISD::VPERMI;
- ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
+ ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64);
PermuteImm = getV4X86ShuffleImm(RepeatedMask);
return true;
}
@@ -26452,7 +27041,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
// VPERMILPD can permute with a non-repeating shuffle.
- if (FloatDomain && MaskScalarSizeInBits == 64) {
+ if (AllowFloatDomain && MaskScalarSizeInBits == 64) {
Shuffle = X86ISD::VPERMILPI;
ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size());
PermuteImm = 0;
@@ -26476,8 +27065,8 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (MaskScalarSizeInBits == 64)
scaleShuffleMask(2, RepeatedMask, WordMask);
- Shuffle = (FloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD);
- ShuffleVT = (FloatDomain ? MVT::f32 : MVT::i32);
+ Shuffle = (AllowFloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD);
+ ShuffleVT = (AllowFloatDomain ? MVT::f32 : MVT::i32);
ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
PermuteImm = getV4X86ShuffleImm(WordMask);
return true;
@@ -26487,34 +27076,36 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// shuffle instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain, SDValue &V1, SDValue &V2,
+ bool AllowFloatDomain, bool AllowIntDomain,
+ SDValue &V1, SDValue &V2, SDLoc &DL,
+ SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
bool IsUnary) {
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
if (MaskVT.is128BitVector()) {
- if (isTargetShuffleEquivalent(Mask, {0, 0}) && FloatDomain) {
+ if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) {
V2 = V1;
Shuffle = X86ISD::MOVLHPS;
ShuffleVT = MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(Mask, {1, 1}) && FloatDomain) {
+ if (isTargetShuffleEquivalent(Mask, {1, 1}) && AllowFloatDomain) {
V2 = V1;
Shuffle = X86ISD::MOVHLPS;
ShuffleVT = MVT::v4f32;
return true;
}
if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() &&
- (FloatDomain || !Subtarget.hasSSE41())) {
+ (AllowFloatDomain || !Subtarget.hasSSE41())) {
std::swap(V1, V2);
Shuffle = X86ISD::MOVSD;
ShuffleVT = MaskVT;
return true;
}
if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) &&
- (FloatDomain || !Subtarget.hasSSE41())) {
+ (AllowFloatDomain || !Subtarget.hasSSE41())) {
Shuffle = X86ISD::MOVSS;
ShuffleVT = MaskVT;
return true;
@@ -26527,57 +27118,12 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
(MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
(MaskVT.is512BitVector() && Subtarget.hasAVX512())) {
- MVT LegalVT = MaskVT;
- if (LegalVT.is256BitVector() && !Subtarget.hasAVX2())
- LegalVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64);
-
- SmallVector<int, 64> Unpckl, Unpckh;
- if (IsUnary) {
- createUnpackShuffleMask(MaskVT, Unpckl, true, true);
- if (isTargetShuffleEquivalent(Mask, Unpckl)) {
- V2 = V1;
- Shuffle = X86ISD::UNPCKL;
- ShuffleVT = LegalVT;
- return true;
- }
-
- createUnpackShuffleMask(MaskVT, Unpckh, false, true);
- if (isTargetShuffleEquivalent(Mask, Unpckh)) {
- V2 = V1;
- Shuffle = X86ISD::UNPCKH;
- ShuffleVT = LegalVT;
- return true;
- }
- } else {
- createUnpackShuffleMask(MaskVT, Unpckl, true, false);
- if (isTargetShuffleEquivalent(Mask, Unpckl)) {
- Shuffle = X86ISD::UNPCKL;
- ShuffleVT = LegalVT;
- return true;
- }
-
- createUnpackShuffleMask(MaskVT, Unpckh, false, false);
- if (isTargetShuffleEquivalent(Mask, Unpckh)) {
- Shuffle = X86ISD::UNPCKH;
- ShuffleVT = LegalVT;
- return true;
- }
-
- ShuffleVectorSDNode::commuteMask(Unpckl);
- if (isTargetShuffleEquivalent(Mask, Unpckl)) {
- std::swap(V1, V2);
- Shuffle = X86ISD::UNPCKL;
- ShuffleVT = LegalVT;
- return true;
- }
-
- ShuffleVectorSDNode::commuteMask(Unpckh);
- if (isTargetShuffleEquivalent(Mask, Unpckh)) {
- std::swap(V1, V2);
- Shuffle = X86ISD::UNPCKH;
- ShuffleVT = LegalVT;
- return true;
- }
+ if (matchVectorShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL,
+ DAG, Subtarget)) {
+ ShuffleVT = MaskVT;
+ if (ShuffleVT.is256BitVector() && !Subtarget.hasAVX2())
+ ShuffleVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64);
+ return true;
}
}
@@ -26585,17 +27131,19 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain,
- SDValue &V1, SDValue &V2,
- SDLoc &DL, SelectionDAG &DAG,
+ bool AllowFloatDomain,
+ bool AllowIntDomain,
+ SDValue &V1, SDValue &V2, SDLoc &DL,
+ SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
+ unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
// Attempt to match against PALIGNR byte rotate.
- if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
- (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
+ if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
int ByteRotation = matchVectorShuffleAsByteRotate(MaskVT, V1, V2, Mask);
if (0 < ByteRotation) {
Shuffle = X86ISD::PALIGNR;
@@ -26606,77 +27154,74 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
// Attempt to combine to X86ISD::BLENDI.
- if (NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) ||
- (Subtarget.hasAVX() && MaskVT.is256BitVector()))) {
- // Determine a type compatible with X86ISD::BLENDI.
- // TODO - add 16i16 support (requires lane duplication).
- MVT BlendVT = MaskVT;
- if (Subtarget.hasAVX2()) {
- if (BlendVT == MVT::v4i64)
- BlendVT = MVT::v8i32;
- else if (BlendVT == MVT::v2i64)
- BlendVT = MVT::v4i32;
- } else {
- if (BlendVT == MVT::v2i64 || BlendVT == MVT::v4i32)
- BlendVT = MVT::v8i16;
- else if (BlendVT == MVT::v4i64)
- BlendVT = MVT::v4f64;
- else if (BlendVT == MVT::v8i32)
- BlendVT = MVT::v8f32;
- }
-
- unsigned BlendSize = BlendVT.getVectorNumElements();
- unsigned MaskRatio = BlendSize / NumMaskElts;
-
- // Can we blend with zero?
- if (isSequentialOrUndefOrZeroInRange(Mask, /*Pos*/ 0, /*Size*/ NumMaskElts,
- /*Low*/ 0) &&
- NumMaskElts <= BlendVT.getVectorNumElements()) {
- PermuteImm = 0;
- for (unsigned i = 0; i != BlendSize; ++i)
- if (Mask[i / MaskRatio] < 0)
- PermuteImm |= 1u << i;
-
- V2 = getZeroVector(BlendVT, Subtarget, DAG, DL);
- Shuffle = X86ISD::BLENDI;
- ShuffleVT = BlendVT;
- return true;
- }
-
- // Attempt to match as a binary blend.
- if (NumMaskElts <= BlendVT.getVectorNumElements()) {
- bool MatchBlend = true;
- for (int i = 0; i != (int)NumMaskElts; ++i) {
- int M = Mask[i];
- if (M == SM_SentinelUndef)
- continue;
- else if (M == SM_SentinelZero)
- MatchBlend = false;
- else if ((M != i) && (M != (i + (int)NumMaskElts)))
- MatchBlend = false;
- }
+ if ((NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) ||
+ (Subtarget.hasAVX() && MaskVT.is256BitVector()))) ||
+ (MaskVT == MVT::v16i16 && Subtarget.hasAVX2())) {
+ uint64_t BlendMask = 0;
+ bool ForceV1Zero = false, ForceV2Zero = false;
+ SmallVector<int, 8> TargetMask(Mask.begin(), Mask.end());
+ if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero,
+ BlendMask)) {
+ if (MaskVT == MVT::v16i16) {
+ // We can only use v16i16 PBLENDW if the lanes are repeated.
+ SmallVector<int, 8> RepeatedMask;
+ if (isRepeatedTargetShuffleMask(128, MaskVT, TargetMask,
+ RepeatedMask)) {
+ assert(RepeatedMask.size() == 8 &&
+ "Repeated mask size doesn't match!");
+ PermuteImm = 0;
+ for (int i = 0; i < 8; ++i)
+ if (RepeatedMask[i] >= 8)
+ PermuteImm |= 1 << i;
+ V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
+ V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
+ Shuffle = X86ISD::BLENDI;
+ ShuffleVT = MaskVT;
+ return true;
+ }
+ } else {
+ // Determine a type compatible with X86ISD::BLENDI.
+ ShuffleVT = MaskVT;
+ if (Subtarget.hasAVX2()) {
+ if (ShuffleVT == MVT::v4i64)
+ ShuffleVT = MVT::v8i32;
+ else if (ShuffleVT == MVT::v2i64)
+ ShuffleVT = MVT::v4i32;
+ } else {
+ if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32)
+ ShuffleVT = MVT::v8i16;
+ else if (ShuffleVT == MVT::v4i64)
+ ShuffleVT = MVT::v4f64;
+ else if (ShuffleVT == MVT::v8i32)
+ ShuffleVT = MVT::v8f32;
+ }
- if (MatchBlend) {
- PermuteImm = 0;
- for (unsigned i = 0; i != BlendSize; ++i)
- if ((int)NumMaskElts <= Mask[i / MaskRatio])
- PermuteImm |= 1u << i;
+ if (!ShuffleVT.isFloatingPoint()) {
+ int Scale = EltSizeInBits / ShuffleVT.getScalarSizeInBits();
+ BlendMask =
+ scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale);
+ ShuffleVT = MVT::getIntegerVT(EltSizeInBits / Scale);
+ ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts * Scale);
+ }
+ V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
+ V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
+ PermuteImm = (unsigned)BlendMask;
Shuffle = X86ISD::BLENDI;
- ShuffleVT = BlendVT;
return true;
}
}
}
// Attempt to combine to INSERTPS.
- if (Subtarget.hasSSE41() && MaskVT == MVT::v4f32) {
- SmallBitVector Zeroable(4, false);
+ if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
+ MaskVT.is128BitVector()) {
+ APInt Zeroable(4, 0);
for (unsigned i = 0; i != NumMaskElts; ++i)
if (Mask[i] < 0)
- Zeroable[i] = true;
+ Zeroable.setBit(i);
- if (Zeroable.any() &&
+ if (Zeroable.getBoolValue() &&
matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
Shuffle = X86ISD::INSERTPS;
ShuffleVT = MVT::v4f32;
@@ -26685,22 +27230,26 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
// Attempt to combine to SHUFPD.
- if ((MaskVT == MVT::v2f64 && Subtarget.hasSSE2()) ||
- (MaskVT == MVT::v4f64 && Subtarget.hasAVX()) ||
- (MaskVT == MVT::v8f64 && Subtarget.hasAVX512())) {
+ if (AllowFloatDomain && EltSizeInBits == 64 &&
+ ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX()) ||
+ (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
if (matchVectorShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
Shuffle = X86ISD::SHUFP;
- ShuffleVT = MaskVT;
+ ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64);
return true;
}
}
// Attempt to combine to SHUFPS.
- if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) ||
- (MaskVT == MVT::v8f32 && Subtarget.hasAVX()) ||
- (MaskVT == MVT::v16f32 && Subtarget.hasAVX512())) {
+ if (AllowFloatDomain && EltSizeInBits == 32 &&
+ ((MaskVT.is128BitVector() && Subtarget.hasSSE1()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX()) ||
+ (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
SmallVector<int, 4> RepeatedMask;
if (isRepeatedTargetShuffleMask(128, MaskVT, Mask, RepeatedMask)) {
+ // Match each half of the repeated mask, to determine if its just
+ // referencing one of the vectors, is zeroable or entirely undef.
auto MatchHalf = [&](unsigned Offset, int &S0, int &S1) {
int M0 = RepeatedMask[Offset];
int M1 = RepeatedMask[Offset + 1];
@@ -26732,7 +27281,7 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
V1 = Lo;
V2 = Hi;
Shuffle = X86ISD::SHUFP;
- ShuffleVT = MaskVT;
+ ShuffleVT = MVT::getVectorVT(MVT::f32, MaskVT.getSizeInBits() / 32);
PermuteImm = getV4X86ShuffleImm(ShufMask);
return true;
}
@@ -26764,7 +27313,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// here, we're not going to remove the operands we find.
bool UnaryShuffle = (Inputs.size() == 1);
SDValue V1 = peekThroughBitcasts(Inputs[0]);
- SDValue V2 = (UnaryShuffle ? V1 : peekThroughBitcasts(Inputs[1]));
+ SDValue V2 = (UnaryShuffle ? DAG.getUNDEF(V1.getValueType())
+ : peekThroughBitcasts(Inputs[1]));
MVT VT1 = V1.getSimpleValueType();
MVT VT2 = V2.getSimpleValueType();
@@ -26853,6 +27403,11 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
MVT ShuffleSrcVT, ShuffleVT;
unsigned Shuffle, PermuteImm;
+ // Which shuffle domains are permitted?
+ // Permit domain crossing at higher combine depths.
+ bool AllowFloatDomain = FloatDomain || (Depth > 3);
+ bool AllowIntDomain = !FloatDomain || (Depth > 3);
+
if (UnaryShuffle) {
// If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
// directly if we don't shuffle the lower element and we shuffle the upper
@@ -26869,8 +27424,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- if (matchUnaryVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget, Shuffle,
- ShuffleSrcVT, ShuffleVT)) {
+ if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
+ V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
+ ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26884,8 +27440,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
- if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget,
- Shuffle, ShuffleVT, PermuteImm)) {
+ if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
+ AllowIntDomain, Subtarget, Shuffle,
+ ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26901,8 +27458,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- if (matchBinaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, Subtarget,
- Shuffle, ShuffleVT, UnaryShuffle)) {
+ if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
+ V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT,
+ UnaryShuffle)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26918,8 +27476,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
- if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, DL,
- DAG, Subtarget, Shuffle, ShuffleVT,
+ if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
+ AllowIntDomain, V1, V2, DL, DAG,
+ Subtarget, Shuffle, ShuffleVT,
PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
@@ -27039,12 +27598,12 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) {
APInt Zero = APInt::getNullValue(MaskEltSizeInBits);
APInt AllOnes = APInt::getAllOnesValue(MaskEltSizeInBits);
- SmallBitVector UndefElts(NumMaskElts, false);
+ APInt UndefElts(NumMaskElts, 0);
SmallVector<APInt, 64> EltBits(NumMaskElts, Zero);
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef) {
- UndefElts[i] = true;
+ UndefElts.setBit(i);
continue;
}
if (M == SM_SentinelZero)
@@ -27228,8 +27787,8 @@ static bool combineX86ShufflesConstants(const SmallVectorImpl<SDValue> &Ops,
// Extract constant bits from each source op.
bool OneUseConstantOp = false;
- SmallVector<SmallBitVector, 4> UndefEltsOps(NumOps);
- SmallVector<SmallVector<APInt, 8>, 4> RawBitsOps(NumOps);
+ SmallVector<APInt, 16> UndefEltsOps(NumOps);
+ SmallVector<SmallVector<APInt, 16>, 16> RawBitsOps(NumOps);
for (unsigned i = 0; i != NumOps; ++i) {
SDValue SrcOp = Ops[i];
OneUseConstantOp |= SrcOp.hasOneUse();
@@ -27245,18 +27804,18 @@ static bool combineX86ShufflesConstants(const SmallVectorImpl<SDValue> &Ops,
return false;
// Shuffle the constant bits according to the mask.
- SmallBitVector UndefElts(NumMaskElts, false);
- SmallBitVector ZeroElts(NumMaskElts, false);
- SmallBitVector ConstantElts(NumMaskElts, false);
+ APInt UndefElts(NumMaskElts, 0);
+ APInt ZeroElts(NumMaskElts, 0);
+ APInt ConstantElts(NumMaskElts, 0);
SmallVector<APInt, 8> ConstantBitData(NumMaskElts,
APInt::getNullValue(MaskSizeInBits));
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef) {
- UndefElts[i] = true;
+ UndefElts.setBit(i);
continue;
} else if (M == SM_SentinelZero) {
- ZeroElts[i] = true;
+ ZeroElts.setBit(i);
continue;
}
assert(0 <= M && M < (int)(NumMaskElts * NumOps));
@@ -27266,21 +27825,21 @@ static bool combineX86ShufflesConstants(const SmallVectorImpl<SDValue> &Ops,
auto &SrcUndefElts = UndefEltsOps[SrcOpIdx];
if (SrcUndefElts[SrcMaskIdx]) {
- UndefElts[i] = true;
+ UndefElts.setBit(i);
continue;
}
auto &SrcEltBits = RawBitsOps[SrcOpIdx];
APInt &Bits = SrcEltBits[SrcMaskIdx];
if (!Bits) {
- ZeroElts[i] = true;
+ ZeroElts.setBit(i);
continue;
}
- ConstantElts[i] = true;
+ ConstantElts.setBit(i);
ConstantBitData[i] = Bits;
}
- assert((UndefElts | ZeroElts | ConstantElts).count() == NumMaskElts);
+ assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue());
// Create the constant data.
MVT MaskSVT;
@@ -27330,6 +27889,7 @@ static bool combineX86ShufflesConstants(const SmallVectorImpl<SDValue> &Ops,
static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
int SrcOpIndex, SDValue Root,
ArrayRef<int> RootMask,
+ ArrayRef<const SDNode*> SrcNodes,
int Depth, bool HasVariableMask,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -27353,13 +27913,17 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
"Can only combine shuffles of the same vector register size.");
// Extract target shuffle mask and resolve sentinels and inputs.
- SDValue Input0, Input1;
- SmallVector<int, 16> OpMask;
- if (!resolveTargetShuffleInputs(Op, Input0, Input1, OpMask))
+ SmallVector<int, 64> OpMask;
+ SmallVector<SDValue, 2> OpInputs;
+ if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask))
return false;
+ assert(OpInputs.size() <= 2 && "Too many shuffle inputs");
+ SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue());
+ SDValue Input1 = (OpInputs.size() > 1 ? OpInputs[1] : SDValue());
+
// Add the inputs to the Ops list, avoiding duplicates.
- SmallVector<SDValue, 8> Ops(SrcOps.begin(), SrcOps.end());
+ SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());
int InputIdx0 = -1, InputIdx1 = -1;
for (int i = 0, e = Ops.size(); i < e; ++i) {
@@ -27392,8 +27956,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
(RootRatio == 1) != (OpRatio == 1)) &&
"Must not have a ratio for both incoming and op masks!");
- SmallVector<int, 16> Mask;
- Mask.reserve(MaskWidth);
+ SmallVector<int, 64> Mask((unsigned)MaskWidth, SM_SentinelUndef);
// Merge this shuffle operation's mask into our accumulated mask. Note that
// this shuffle's mask will be the first applied to the input, followed by the
@@ -27403,7 +27966,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
int RootIdx = i / RootRatio;
if (RootMask[RootIdx] < 0) {
// This is a zero or undef lane, we're done.
- Mask.push_back(RootMask[RootIdx]);
+ Mask[i] = RootMask[RootIdx];
continue;
}
@@ -27413,7 +27976,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
// than the SrcOp we're currently inserting.
if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) ||
(((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) {
- Mask.push_back(RootMaskedIdx);
+ Mask[i] = RootMaskedIdx;
continue;
}
@@ -27423,7 +27986,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
if (OpMask[OpIdx] < 0) {
// The incoming lanes are zero or undef, it doesn't matter which ones we
// are using.
- Mask.push_back(OpMask[OpIdx]);
+ Mask[i] = OpMask[OpIdx];
continue;
}
@@ -27439,7 +28002,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
OpMaskedIdx += InputIdx1 * MaskWidth;
}
- Mask.push_back(OpMaskedIdx);
+ Mask[i] = OpMaskedIdx;
}
// Handle the all undef/zero cases early.
@@ -27457,28 +28020,25 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
}
// Remove unused shuffle source ops.
- SmallVector<SDValue, 8> UsedOps;
- for (int i = 0, e = Ops.size(); i < e; ++i) {
- int lo = UsedOps.size() * MaskWidth;
- int hi = lo + MaskWidth;
- if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
- UsedOps.push_back(Ops[i]);
- continue;
- }
- for (int &M : Mask)
- if (lo <= M)
- M -= MaskWidth;
- }
- assert(!UsedOps.empty() && "Shuffle with no inputs detected");
- Ops = UsedOps;
+ resolveTargetShuffleInputsAndMask(Ops, Mask);
+ assert(!Ops.empty() && "Shuffle with no inputs detected");
HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode());
- // See if we can recurse into each shuffle source op (if it's a target shuffle).
+ // Update the list of shuffle nodes that have been combined so far.
+ SmallVector<const SDNode *, 16> CombinedNodes(SrcNodes.begin(),
+ SrcNodes.end());
+ CombinedNodes.push_back(Op.getNode());
+
+ // See if we can recurse into each shuffle source op (if it's a target
+ // shuffle). The source op should only be combined if it either has a
+ // single use (i.e. current Op) or all its users have already been combined.
for (int i = 0, e = Ops.size(); i < e; ++i)
- if (Ops[i].getNode()->hasOneUse() || Op->isOnlyUserOf(Ops[i].getNode()))
- if (combineX86ShufflesRecursively(Ops, i, Root, Mask, Depth + 1,
- HasVariableMask, DAG, DCI, Subtarget))
+ if (Ops[i].getNode()->hasOneUse() ||
+ SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode()))
+ if (combineX86ShufflesRecursively(Ops, i, Root, Mask, CombinedNodes,
+ Depth + 1, HasVariableMask, DAG, DCI,
+ Subtarget))
return true;
// Attempt to constant fold all of the constant source ops.
@@ -27495,7 +28055,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
// elements, and shrink them to the half-width mask. It does this in a loop
// so it will reduce the size of the mask to the minimal width mask which
// performs an equivalent shuffle.
- SmallVector<int, 16> WidenedMask;
+ SmallVector<int, 64> WidenedMask;
while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
Mask = std::move(WidenedMask);
}
@@ -27561,8 +28121,7 @@ static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
/// altering anything.
static SDValue
combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
- SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG) {
assert(N.getOpcode() == X86ISD::PSHUFD &&
"Called with something other than an x86 128-bit half shuffle!");
SDLoc DL(N);
@@ -27842,19 +28401,20 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
case X86ISD::MOVSD:
case X86ISD::MOVSS: {
- bool isFloat = VT.isFloatingPoint();
SDValue V0 = peekThroughBitcasts(N->getOperand(0));
SDValue V1 = peekThroughBitcasts(N->getOperand(1));
- bool isFloat0 = V0.getSimpleValueType().isFloatingPoint();
- bool isFloat1 = V1.getSimpleValueType().isFloatingPoint();
bool isZero0 = ISD::isBuildVectorAllZeros(V0.getNode());
bool isZero1 = ISD::isBuildVectorAllZeros(V1.getNode());
- assert(!(isZero0 && isZero1) && "Zeroable shuffle detected.");
+ if (isZero0 && isZero1)
+ return SDValue();
// We often lower to MOVSD/MOVSS from integer as well as native float
// types; remove unnecessary domain-crossing bitcasts if we can to make it
// easier to combine shuffles later on. We've already accounted for the
// domain switching cost when we decided to lower with it.
+ bool isFloat = VT.isFloatingPoint();
+ bool isFloat0 = V0.getSimpleValueType().isFloatingPoint();
+ bool isFloat1 = V1.getSimpleValueType().isFloatingPoint();
if ((isFloat != isFloat0 || isZero0) && (isFloat != isFloat1 || isZero1)) {
MVT NewVT = isFloat ? (X86ISD::MOVSD == Opcode ? MVT::v2i64 : MVT::v4i32)
: (X86ISD::MOVSD == Opcode ? MVT::v2f64 : MVT::v4f32);
@@ -28025,7 +28585,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
break;
case X86ISD::PSHUFD:
- if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG, DCI))
+ if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG))
return NewN;
break;
@@ -28173,12 +28733,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
-
- // Don't create instructions with illegal types after legalize types has run.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
- return SDValue();
-
// If we have legalized the vector types, look for blends of FADD and FSUB
// nodes that we can fuse into an ADDSUB node.
if (TLI.isTypeLegal(VT))
@@ -28249,11 +28804,18 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
// consecutive, non-overlapping, and in the right order.
SmallVector<SDValue, 16> Elts;
- for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
- Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ if (SDValue Elt = getShuffleScalarElt(N, i, DAG, 0)) {
+ Elts.push_back(Elt);
+ continue;
+ }
+ Elts.clear();
+ break;
+ }
- if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true))
- return LD;
+ if (Elts.size() == VT.getVectorNumElements())
+ if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true))
+ return LD;
// For AVX2, we sometimes want to combine
// (vector_shuffle <mask> (concat_vectors t1, undef)
@@ -28276,7 +28838,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// a particular chain.
SmallVector<int, 1> NonceMask; // Just a placeholder.
NonceMask.push_back(0);
- if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
+ if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
DCI, Subtarget))
return SDValue(); // This routine will use CombineTo to replace N.
@@ -28303,18 +28865,13 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
EVT OriginalVT = InVec.getValueType();
- if (InVec.getOpcode() == ISD::BITCAST) {
- // Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
- EVT BCVT = InVec.getOperand(0).getValueType();
- if (!BCVT.isVector() ||
- BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
- return SDValue();
- InVec = InVec.getOperand(0);
- }
+ // Peek through bitcasts, don't duplicate a load with other uses.
+ InVec = peekThroughOneUseBitcasts(InVec);
EVT CurrentVT = InVec.getValueType();
+ if (!CurrentVT.isVector() ||
+ CurrentVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
+ return SDValue();
if (!isTargetShuffle(InVec.getOpcode()))
return SDValue();
@@ -28393,19 +28950,41 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
+
+ // Since MMX types are special and don't usually play with other vector types,
+ // it's better to handle them early to be sure we emit efficient code by
+ // avoiding store-load conversions.
- // Detect bitcasts between i32 to x86mmx low word. Since MMX types are
- // special and don't usually play with other vector types, it's better to
- // handle them early to be sure we emit efficient code by avoiding
- // store-load conversions.
+ // Detect bitcasts between i32 to x86mmx low word.
if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR &&
- N0.getValueType() == MVT::v2i32 &&
- isNullConstant(N0.getOperand(1))) {
+ SrcVT == MVT::v2i32 && isNullConstant(N0.getOperand(1))) {
SDValue N00 = N0->getOperand(0);
if (N00.getValueType() == MVT::i32)
return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
}
+ // Detect bitcasts between element or subvector extraction to x86mmx.
+ if (VT == MVT::x86mmx &&
+ (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) &&
+ isNullConstant(N0.getOperand(1))) {
+ SDValue N00 = N0->getOperand(0);
+ if (N00.getValueType().is128BitVector())
+ return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT,
+ DAG.getBitcast(MVT::v2i64, N00));
+ }
+
+ // Detect bitcasts from FP_TO_SINT to x86mmx.
+ if (VT == MVT::x86mmx && SrcVT == MVT::v2i32 &&
+ N0.getOpcode() == ISD::FP_TO_SINT) {
+ SDLoc DL(N0);
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
+ DAG.getUNDEF(MVT::v2i32));
+ return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT,
+ DAG.getBitcast(MVT::v2i64, Res));
+ }
+
// Convert a bitcasted integer logic operation that has one bitcasted
// floating-point operand into a floating-point logic operation. This may
// create a load of a constant, but that is cheaper than materializing the
@@ -28511,12 +29090,18 @@ static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0,
if (SetCC.getOpcode() != ISD::SETCC)
return false;
ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
- if (CC != ISD::SETGT)
+ if (CC != ISD::SETGT && CC != ISD::SETLT)
return false;
SDValue SelectOp1 = Select->getOperand(1);
SDValue SelectOp2 = Select->getOperand(2);
+ // The following instructions assume SelectOp1 is the subtraction operand
+ // and SelectOp2 is the negation operand.
+ // In the case of SETLT this is the other way around.
+ if (CC == ISD::SETLT)
+ std::swap(SelectOp1, SelectOp2);
+
// The second operand of the select should be the negation of the first
// operand, which is implemented as 0 - SelectOp1.
if (!(SelectOp2.getOpcode() == ISD::SUB &&
@@ -28529,8 +29114,17 @@ static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0,
if (SetCC.getOperand(0) != SelectOp1)
return false;
- // The second operand of the comparison can be either -1 or 0.
- if (!(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) ||
+ // In SetLT case, The second operand of the comparison can be either 1 or 0.
+ APInt SplatVal;
+ if ((CC == ISD::SETLT) &&
+ !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) &&
+ SplatVal == 1) ||
+ (ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()))))
+ return false;
+
+ // In SetGT case, The second operand of the comparison can be either -1 or 0.
+ if ((CC == ISD::SETGT) &&
+ !(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) ||
ISD::isBuildVectorAllOnes(SetCC.getOperand(1).getNode())))
return false;
@@ -28576,17 +29170,92 @@ static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1);
}
+// Attempt to replace an all_of/any_of style horizontal reduction with a MOVMSK.
+static SDValue combineHorizontalPredicateResult(SDNode *Extract,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // Bail without SSE2 or with AVX512VL (which uses predicate registers).
+ if (!Subtarget.hasSSE2() || Subtarget.hasVLX())
+ return SDValue();
+
+ EVT ExtractVT = Extract->getValueType(0);
+ unsigned BitWidth = ExtractVT.getSizeInBits();
+ if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 &&
+ ExtractVT != MVT::i8)
+ return SDValue();
+
+ // Check for OR(any_of) and AND(all_of) horizontal reduction patterns.
+ for (ISD::NodeType Op : {ISD::OR, ISD::AND}) {
+ SDValue Match = matchBinOpReduction(Extract, Op);
+ if (!Match)
+ continue;
+
+ // EXTRACT_VECTOR_ELT can require implicit extension of the vector element
+ // which we can't support here for now.
+ if (Match.getScalarValueSizeInBits() != BitWidth)
+ continue;
+
+ // We require AVX2 for PMOVMSKB for v16i16/v32i8;
+ unsigned MatchSizeInBits = Match.getValueSizeInBits();
+ if (!(MatchSizeInBits == 128 ||
+ (MatchSizeInBits == 256 &&
+ ((Subtarget.hasAVX() && BitWidth >= 32) || Subtarget.hasAVX2()))))
+ return SDValue();
+
+ // Don't bother performing this for 2-element vectors.
+ if (Match.getValueType().getVectorNumElements() <= 2)
+ return SDValue();
+
+ // Check that we are extracting a reduction of all sign bits.
+ if (DAG.ComputeNumSignBits(Match) != BitWidth)
+ return SDValue();
+
+ // For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB.
+ MVT MaskVT;
+ if (64 == BitWidth || 32 == BitWidth)
+ MaskVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth),
+ MatchSizeInBits / BitWidth);
+ else
+ MaskVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8);
+
+ APInt CompareBits;
+ ISD::CondCode CondCode;
+ if (Op == ISD::OR) {
+ // any_of -> MOVMSK != 0
+ CompareBits = APInt::getNullValue(32);
+ CondCode = ISD::CondCode::SETNE;
+ } else {
+ // all_of -> MOVMSK == ((1 << NumElts) - 1)
+ CompareBits = APInt::getLowBitsSet(32, MaskVT.getVectorNumElements());
+ CondCode = ISD::CondCode::SETEQ;
+ }
+
+ // Perform the select as i32/i64 and then truncate to avoid partial register
+ // stalls.
+ unsigned ResWidth = std::max(BitWidth, 32u);
+ EVT ResVT = EVT::getIntegerVT(*DAG.getContext(), ResWidth);
+ SDLoc DL(Extract);
+ SDValue Zero = DAG.getConstant(0, DL, ResVT);
+ SDValue Ones = DAG.getAllOnesConstant(DL, ResVT);
+ SDValue Res = DAG.getBitcast(MaskVT, Match);
+ Res = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Res);
+ Res = DAG.getSelectCC(DL, Res, DAG.getConstant(CompareBits, DL, MVT::i32),
+ Ones, Zero, CondCode);
+ return DAG.getSExtOrTrunc(Res, DL, ExtractVT);
+ }
+
+ return SDValue();
+}
+
static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// PSADBW is only supported on SSE2 and up.
if (!Subtarget.hasSSE2())
return SDValue();
- // Verify the type we're extracting from is appropriate
- // TODO: There's nothing special about i32, any integer type above i16 should
- // work just as well.
+ // Verify the type we're extracting from is any integer type above i16.
EVT VT = Extract->getOperand(0).getValueType();
- if (!VT.isSimple() || !(VT.getVectorElementType() == MVT::i32))
+ if (!VT.isSimple() || !(VT.getVectorElementType().getSizeInBits() > 16))
return SDValue();
unsigned RegSize = 128;
@@ -28595,15 +29264,28 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
else if (Subtarget.hasAVX2())
RegSize = 256;
- // We only handle v16i32 for SSE2 / v32i32 for AVX2 / v64i32 for AVX512.
+ // We handle upto v16i* for SSE2 / v32i* for AVX2 / v64i* for AVX512.
// TODO: We should be able to handle larger vectors by splitting them before
// feeding them into several SADs, and then reducing over those.
- if (VT.getSizeInBits() / 4 > RegSize)
+ if (RegSize / VT.getVectorNumElements() < 8)
return SDValue();
// Match shuffle + add pyramid.
SDValue Root = matchBinOpReduction(Extract, ISD::ADD);
+ // The operand is expected to be zero extended from i8
+ // (verified in detectZextAbsDiff).
+ // In order to convert to i64 and above, additional any/zero/sign
+ // extend is expected.
+ // The zero extend from 32 bit has no mathematical effect on the result.
+ // Also the sign extend is basically zero extend
+ // (extends the sign bit which is zero).
+ // So it is correct to skip the sign/zero extend instruction.
+ if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND ||
+ Root.getOpcode() == ISD::ZERO_EXTEND ||
+ Root.getOpcode() == ISD::ANY_EXTEND))
+ Root = Root.getOperand(0);
+
// If there was a match, we want Root to be a select that is the root of an
// abs-diff pattern.
if (!Root || (Root.getOpcode() != ISD::VSELECT))
@@ -28614,7 +29296,7 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
if (!detectZextAbsDiff(Root, Zext0, Zext1))
return SDValue();
- // Create the SAD instruction
+ // Create the SAD instruction.
SDLoc DL(Extract);
SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL);
@@ -28636,13 +29318,103 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
}
}
- // Return the lowest i32.
- MVT ResVT = MVT::getVectorVT(MVT::i32, SadVT.getSizeInBits() / 32);
+ MVT Type = Extract->getSimpleValueType(0);
+ unsigned TypeSizeInBits = Type.getSizeInBits();
+ // Return the lowest TypeSizeInBits bits.
+ MVT ResVT = MVT::getVectorVT(Type, SadVT.getSizeInBits() / TypeSizeInBits);
SAD = DAG.getNode(ISD::BITCAST, DL, ResVT, SAD);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, SAD,
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Type, SAD,
Extract->getOperand(1));
}
+// Attempt to peek through a target shuffle and extract the scalar from the
+// source.
+static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue Src = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = Src.getValueType();
+ EVT SrcSVT = SrcVT.getVectorElementType();
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+
+ // Don't attempt this for boolean mask vectors or unknown extraction indices.
+ if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ // Resolve the target shuffle inputs and mask.
+ SmallVector<int, 16> Mask;
+ SmallVector<SDValue, 2> Ops;
+ if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask))
+ return SDValue();
+
+ // Attempt to narrow/widen the shuffle mask to the correct size.
+ if (Mask.size() != NumSrcElts) {
+ if ((NumSrcElts % Mask.size()) == 0) {
+ SmallVector<int, 16> ScaledMask;
+ int Scale = NumSrcElts / Mask.size();
+ scaleShuffleMask(Scale, Mask, ScaledMask);
+ Mask = std::move(ScaledMask);
+ } else if ((Mask.size() % NumSrcElts) == 0) {
+ SmallVector<int, 16> WidenedMask;
+ while (Mask.size() > NumSrcElts &&
+ canWidenShuffleElements(Mask, WidenedMask))
+ Mask = std::move(WidenedMask);
+ // TODO - investigate support for wider shuffle masks with known upper
+ // undef/zero elements for implicit zero-extension.
+ }
+ }
+
+ // Check if narrowing/widening failed.
+ if (Mask.size() != NumSrcElts)
+ return SDValue();
+
+ int SrcIdx = Mask[N->getConstantOperandVal(1)];
+ SDLoc dl(N);
+
+ // If the shuffle source element is undef/zero then we can just accept it.
+ if (SrcIdx == SM_SentinelUndef)
+ return DAG.getUNDEF(VT);
+
+ if (SrcIdx == SM_SentinelZero)
+ return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, dl, VT)
+ : DAG.getConstant(0, dl, VT);
+
+ SDValue SrcOp = Ops[SrcIdx / Mask.size()];
+ SrcOp = DAG.getBitcast(SrcVT, SrcOp);
+ SrcIdx = SrcIdx % Mask.size();
+
+ // We can only extract other elements from 128-bit vectors and in certain
+ // circumstances, depending on SSE-level.
+ // TODO: Investigate using extract_subvector for larger vectors.
+ // TODO: Investigate float/double extraction if it will be just stored.
+ if ((SrcVT == MVT::v4i32 || SrcVT == MVT::v2i64) &&
+ ((SrcIdx == 0 && Subtarget.hasSSE2()) || Subtarget.hasSSE41())) {
+ assert(SrcSVT == VT && "Unexpected extraction type");
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcSVT, SrcOp,
+ DAG.getIntPtrConstant(SrcIdx, dl));
+ }
+
+ if ((SrcVT == MVT::v8i16 && Subtarget.hasSSE2()) ||
+ (SrcVT == MVT::v16i8 && Subtarget.hasSSE41())) {
+ assert(VT.getSizeInBits() >= SrcSVT.getSizeInBits() &&
+ "Unexpected extraction type");
+ unsigned OpCode = (SrcVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB);
+ SDValue ExtOp = DAG.getNode(OpCode, dl, MVT::i32, SrcOp,
+ DAG.getIntPtrConstant(SrcIdx, dl));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, ExtOp,
+ DAG.getValueType(SrcSVT));
+ return DAG.getZExtOrTrunc(Assert, dl, VT);
+ }
+
+ return SDValue();
+}
+
/// Detect vector gather/scatter index generation and convert it from being a
/// bunch of shuffles and extracts into a somewhat faster sequence.
/// For i686, the best sequence is apparently storing the value and loading
@@ -28653,14 +29425,29 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
return NewOp;
+ if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget))
+ return NewOp;
+
SDValue InputVector = N->getOperand(0);
+ SDValue EltIdx = N->getOperand(1);
+
+ EVT SrcVT = InputVector.getValueType();
+ EVT VT = N->getValueType(0);
SDLoc dl(InputVector);
+
+ // Detect mmx extraction of all bits as a i64. It works better as a bitcast.
+ if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
+ VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) {
+ SDValue MMXSrc = InputVector.getOperand(0);
+
+ // The bitcast source is a direct mmx result.
+ if (MMXSrc.getValueType() == MVT::x86mmx)
+ return DAG.getBitcast(VT, InputVector);
+ }
+
// Detect mmx to i32 conversion through a v2i32 elt extract.
if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
- N->getValueType(0) == MVT::i32 &&
- InputVector.getValueType() == MVT::v2i32 &&
- isa<ConstantSDNode>(N->getOperand(1)) &&
- N->getConstantOperandVal(1) == 0) {
+ VT == MVT::i32 && SrcVT == MVT::v2i32 && isNullConstant(EltIdx)) {
SDValue MMXSrc = InputVector.getOperand(0);
// The bitcast source is a direct mmx result.
@@ -28668,15 +29455,11 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc);
}
- EVT VT = N->getValueType(0);
-
- if (VT == MVT::i1 && isa<ConstantSDNode>(N->getOperand(1)) &&
- InputVector.getOpcode() == ISD::BITCAST &&
+ if (VT == MVT::i1 && InputVector.getOpcode() == ISD::BITCAST &&
+ isa<ConstantSDNode>(EltIdx) &&
isa<ConstantSDNode>(InputVector.getOperand(0))) {
- uint64_t ExtractedElt =
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- uint64_t InputValue =
- cast<ConstantSDNode>(InputVector.getOperand(0))->getZExtValue();
+ uint64_t ExtractedElt = N->getConstantOperandVal(1);
+ uint64_t InputValue = InputVector.getConstantOperandVal(0);
uint64_t Res = (InputValue >> ExtractedElt) & 1;
return DAG.getConstant(Res, dl, MVT::i1);
}
@@ -28687,9 +29470,13 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget))
return SAD;
+ // Attempt to replace an all_of/any_of horizontal reduction with a MOVMSK.
+ if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget))
+ return Cmp;
+
// Only operate on vectors of 4 elements, where the alternative shuffling
// gets to be more expensive.
- if (InputVector.getValueType() != MVT::v4i32)
+ if (SrcVT != MVT::v4i32)
return SDValue();
// Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
@@ -28717,9 +29504,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Record which element was extracted.
- ExtractedElements |=
- 1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
-
+ ExtractedElements |= 1 << Extract->getConstantOperandVal(1);
Uses.push_back(Extract);
}
@@ -28752,11 +29537,11 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::SRA, dl, MVT::i64, TopHalf, ShAmt));
} else {
// Store the value to a temporary stack slot.
- SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
+ SDValue StackPtr = DAG.CreateStackTemporary(SrcVT);
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
MachinePointerInfo());
- EVT ElementType = InputVector.getValueType().getVectorElementType();
+ EVT ElementType = SrcVT.getVectorElementType();
unsigned EltSize = ElementType.getSizeInBits() / 8;
// Replace each use (extract) with a load of the appropriate element.
@@ -28779,8 +29564,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
UE = Uses.end(); UI != UE; ++UI) {
SDNode *Extract = *UI;
- SDValue Idx = Extract->getOperand(1);
- uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ uint64_t IdxVal = Extract->getConstantOperandVal(1);
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]);
}
@@ -28788,6 +29572,16 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// TODO - merge with combineExtractVectorElt once it can handle the implicit
+// zero-extension of X86ISD::PINSRW/X86ISD::PINSRB in:
+// XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and
+// combineBasicSADPattern.
+static SDValue combineExtractVectorElt_SSE(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ return combineExtractWithShuffle(N, DAG, DCI, Subtarget);
+}
+
/// If a vector select has an operand that is -1 or 0, try to simplify the
/// select to a bitwise logic operation.
static SDValue
@@ -28812,12 +29606,11 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
// This situation only applies to avx512.
if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse() &&
CondVT.getVectorElementType() == MVT::i1) {
- //Invert the cond to not(cond) : xor(op,allones)=not(op)
- SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
- DAG.getConstant(APInt::getAllOnesValue(CondVT.getScalarSizeInBits()),
- DL, CondVT));
- //Vselect cond, op1, op2 = Vselect not(cond), op2, op1
- return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS);
+ // Invert the cond to not(cond) : xor(op,allones)=not(op)
+ SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getAllOnesConstant(DL, CondVT));
+ // Vselect cond, op1, op2 = Vselect not(cond), op2, op1
+ return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS);
}
// To use the condition operand as a bitwise mask, it must have elements that
@@ -28920,18 +29713,6 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
DAG.getConstant(ShAmt, DL, MVT::i8));
}
- // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
- if (FalseC->getAPIntValue() + 1 == TrueC->getAPIntValue()) {
- if (NeedsCondInvert) // Invert the condition if needed.
- Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
- DAG.getConstant(1, DL, Cond.getValueType()));
-
- // Zero extend the condition if needed.
- Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
- return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
- SDValue(FalseC, 0));
- }
-
// Optimize cases that will turn into an LEA instruction. This requires
// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
@@ -29049,7 +29830,7 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
return false;
MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
// Only change element size, not type.
- if (VT.isInteger() != OpEltVT.isInteger())
+ if (EltVT.isInteger() != OpEltVT.isInteger())
return false;
uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
@@ -29063,7 +29844,7 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
DCI.AddToWorklist(Op1.getNode());
DCI.CombineTo(OrigOp.getNode(),
DAG.getNode(Opcode, DL, VT, Op0, Op1,
- DAG.getConstant(Imm, DL, MVT::i8)));
+ DAG.getIntPtrConstant(Imm, DL)));
return true;
}
case ISD::EXTRACT_SUBVECTOR: {
@@ -29072,7 +29853,7 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
return false;
MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
// Only change element size, not type.
- if (VT.isInteger() != OpEltVT.isInteger())
+ if (EltVT.isInteger() != OpEltVT.isInteger())
return false;
uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
@@ -29084,7 +29865,23 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
DCI.AddToWorklist(Op0.getNode());
DCI.CombineTo(OrigOp.getNode(),
DAG.getNode(Opcode, DL, VT, Op0,
- DAG.getConstant(Imm, DL, MVT::i8)));
+ DAG.getIntPtrConstant(Imm, DL)));
+ return true;
+ }
+ case X86ISD::SUBV_BROADCAST: {
+ unsigned EltSize = EltVT.getSizeInBits();
+ if (EltSize != 32 && EltSize != 64)
+ return false;
+ // Only change element size, not type.
+ if (VT.isInteger() != Op.getSimpleValueType().isInteger())
+ return false;
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = MVT::getVectorVT(EltVT,
+ Op0.getSimpleValueType().getSizeInBits() / EltSize);
+ Op0 = DAG.getBitcast(Op0VT, Op.getOperand(0));
+ DCI.AddToWorklist(Op0.getNode());
+ DCI.CombineTo(OrigOp.getNode(),
+ DAG.getNode(Opcode, DL, VT, Op0));
return true;
}
}
@@ -29370,8 +30167,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// If this is a *dynamic* select (non-constant condition) and we can match
// this node with one of the variable blend instructions, restructure the
- // condition so that the blends can use the high bit of each element and use
- // SimplifyDemandedBits to simplify the condition operand.
+ // condition so that blends can use the high (sign) bit of each element and
+ // use SimplifyDemandedBits to simplify the condition operand.
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() &&
!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
@@ -29406,49 +30203,45 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return SDValue();
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
- APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
-
+ APInt DemandedMask(APInt::getSignBit(BitWidth));
APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
DCI.isBeforeLegalizeOps());
if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne,
TLO)) {
- // If we changed the computation somewhere in the DAG, this change
- // will affect all users of Cond.
- // Make sure it is fine and update all the nodes so that we do not
- // use the generic VSELECT anymore. Otherwise, we may perform
- // wrong optimizations as we messed up with the actual expectation
+ // If we changed the computation somewhere in the DAG, this change will
+ // affect all users of Cond. Make sure it is fine and update all the nodes
+ // so that we do not use the generic VSELECT anymore. Otherwise, we may
+ // perform wrong optimizations as we messed with the actual expectation
// for the vector boolean values.
if (Cond != TLO.Old) {
- // Check all uses of that condition operand to check whether it will be
- // consumed by non-BLEND instructions, which may depend on all bits are
- // set properly.
- for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end();
- I != E; ++I)
- if (I->getOpcode() != ISD::VSELECT)
- // TODO: Add other opcodes eventually lowered into BLEND.
+ // Check all uses of the condition operand to check whether it will be
+ // consumed by non-BLEND instructions. Those may require that all bits
+ // are set properly.
+ for (SDNode *U : Cond->uses()) {
+ // TODO: Add other opcodes eventually lowered into BLEND.
+ if (U->getOpcode() != ISD::VSELECT)
return SDValue();
+ }
- // Update all the users of the condition, before committing the change,
- // so that the VSELECT optimizations that expect the correct vector
- // boolean value will not be triggered.
- for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end();
- I != E; ++I)
- DAG.ReplaceAllUsesOfValueWith(
- SDValue(*I, 0),
- DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(*I), I->getValueType(0),
- Cond, I->getOperand(1), I->getOperand(2)));
+ // Update all users of the condition before committing the change, so
+ // that the VSELECT optimizations that expect the correct vector boolean
+ // value will not be triggered.
+ for (SDNode *U : Cond->uses()) {
+ SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(U),
+ U->getValueType(0), Cond, U->getOperand(1),
+ U->getOperand(2));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
+ }
DCI.CommitTargetLoweringOpt(TLO);
return SDValue();
}
- // At this point, only Cond is changed. Change the condition
- // just for N to keep the opportunity to optimize all other
- // users their own way.
- DAG.ReplaceAllUsesOfValueWith(
- SDValue(N, 0),
- DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(N), N->getValueType(0),
- TLO.New, N->getOperand(1), N->getOperand(2)));
+ // Only Cond (rather than other nodes in the computation chain) was
+ // changed. Change the condition just for N to keep the opportunity to
+ // optimize all other users their own way.
+ SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, DL, VT, TLO.New, LHS, RHS);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), SB);
return SDValue();
}
}
@@ -29456,7 +30249,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Look for vselects with LHS/RHS being bitcasted from an operation that
// can be executed on another type. Push the bitcast to the inputs of
// the operation. This exposes opportunities for using masking instructions.
- if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalizeOps() &&
+ if (N->getOpcode() == ISD::VSELECT && DCI.isAfterLegalizeVectorOps() &&
CondVT.getVectorElementType() == MVT::i1) {
if (combineBitcastForMaskedOp(LHS, DAG, DCI))
return SDValue(N, 0);
@@ -30208,22 +31001,37 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
}
if (!NewMul) {
- assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX)
- && "Both cases that could cause potential overflows should have "
- "already been handled.");
- if (isPowerOf2_64(MulAmt - 1))
- // (mul x, 2^N + 1) => (add (shl x, N), x)
- NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
- DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
- DAG.getConstant(Log2_64(MulAmt - 1), DL,
- MVT::i8)));
-
- else if (isPowerOf2_64(MulAmt + 1))
- // (mul x, 2^N - 1) => (sub (shl x, N), x)
- NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT,
- N->getOperand(0),
- DAG.getConstant(Log2_64(MulAmt + 1),
- DL, MVT::i8)), N->getOperand(0));
+ assert(MulAmt != 0 &&
+ MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
+ "Both cases that could cause potential overflows should have "
+ "already been handled.");
+ int64_t SignMulAmt = C->getSExtValue();
+ if ((SignMulAmt != INT64_MIN) && (SignMulAmt != INT64_MAX) &&
+ (SignMulAmt != -INT64_MAX)) {
+ int NumSign = SignMulAmt > 0 ? 1 : -1;
+ bool IsPowerOf2_64PlusOne = isPowerOf2_64(NumSign * SignMulAmt - 1);
+ bool IsPowerOf2_64MinusOne = isPowerOf2_64(NumSign * SignMulAmt + 1);
+ if (IsPowerOf2_64PlusOne) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ NewMul = DAG.getNode(
+ ISD::ADD, DL, VT, N->getOperand(0),
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(NumSign * SignMulAmt - 1), DL,
+ MVT::i8)));
+ } else if (IsPowerOf2_64MinusOne) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ NewMul = DAG.getNode(
+ ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(NumSign * SignMulAmt + 1), DL,
+ MVT::i8)),
+ N->getOperand(0));
+ }
+ // To negate, subtract the number from zero
+ if ((IsPowerOf2_64PlusOne || IsPowerOf2_64MinusOne) && NumSign == -1)
+ NewMul =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
+ }
}
if (NewMul)
@@ -30396,42 +31204,95 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- assert((X86ISD::VSHLI == N->getOpcode() || X86ISD::VSRLI == N->getOpcode()) &&
- "Unexpected opcode");
+static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ unsigned Opcode = N->getOpcode();
+ assert((X86ISD::VSHLI == Opcode || X86ISD::VSRAI == Opcode ||
+ X86ISD::VSRLI == Opcode) &&
+ "Unexpected shift opcode");
+ bool LogicalShift = X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode;
EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
-
- // This fails for mask register (vXi1) shifts.
- if ((NumBitsPerElt % 8) != 0)
- return SDValue();
+ assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 &&
+ "Unexpected value type");
// Out of range logical bit shifts are guaranteed to be zero.
- APInt ShiftVal = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
- if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt))
- return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+ // Out of range arithmetic bit shifts splat the sign bit.
+ APInt ShiftVal = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt)) {
+ if (LogicalShift)
+ return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+ else
+ ShiftVal = NumBitsPerElt - 1;
+ }
// Shift N0 by zero -> N0.
if (!ShiftVal)
- return N->getOperand(0);
+ return N0;
// Shift zero -> zero.
- if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+ // fold (VSRLI (VSRAI X, Y), 31) -> (VSRLI X, 31).
+ // This VSRLI only looks at the sign bit, which is unmodified by VSRAI.
+ // TODO - support other sra opcodes as needed.
+ if (Opcode == X86ISD::VSRLI && (ShiftVal + 1) == NumBitsPerElt &&
+ N0.getOpcode() == X86ISD::VSRAI)
+ return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, N0.getOperand(0), N1);
+
// We can decode 'whole byte' logical bit shifts as shuffles.
- if ((ShiftVal.getZExtValue() % 8) == 0) {
+ if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) {
SDValue Op(N, 0);
SmallVector<int, 1> NonceMask; // Just a placeholder.
NonceMask.push_back(0);
- if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
+ if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
DCI, Subtarget))
return SDValue(); // This routine will use CombineTo to replace N.
}
+ // Constant Folding.
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ if (N->isOnlyUserOf(N0.getNode()) &&
+ getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
+ assert(EltBits.size() == VT.getVectorNumElements() &&
+ "Unexpected shift value type");
+ unsigned ShiftImm = ShiftVal.getZExtValue();
+ for (APInt &Elt : EltBits) {
+ if (X86ISD::VSHLI == Opcode)
+ Elt = Elt.shl(ShiftImm);
+ else if (X86ISD::VSRAI == Opcode)
+ Elt = Elt.ashr(ShiftImm);
+ else
+ Elt = Elt.lshr(ShiftImm);
+ }
+ return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
+ }
+
+ return SDValue();
+}
+
+static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ assert(
+ ((N->getOpcode() == X86ISD::PINSRB && N->getValueType(0) == MVT::v16i8) ||
+ (N->getOpcode() == X86ISD::PINSRW &&
+ N->getValueType(0) == MVT::v8i16)) &&
+ "Unexpected vector insertion");
+
+ // Attempt to combine PINSRB/PINSRW patterns to a shuffle.
+ SDValue Op(N, 0);
+ SmallVector<int, 1> NonceMask; // Just a placeholder.
+ NonceMask.push_back(0);
+ combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
+ /*Depth*/ 1, /*HasVarMask*/ false, DAG,
+ DCI, Subtarget);
return SDValue();
}
@@ -30550,33 +31411,15 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
if (VT != MVT::v2i64 && VT != MVT::v4i64 && VT != MVT::v8i64)
return SDValue();
- // Canonicalize XOR to the left.
- if (N1.getOpcode() == ISD::XOR)
- std::swap(N0, N1);
+ if (N0.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
- if (N0.getOpcode() != ISD::XOR)
- return SDValue();
-
- SDValue N00 = N0->getOperand(0);
- SDValue N01 = N0->getOperand(1);
+ if (N1.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
- N01 = peekThroughBitcasts(N01);
-
- // Either match a direct AllOnes for 128, 256, and 512-bit vectors, or an
- // insert_subvector building a 256-bit AllOnes vector.
- if (!ISD::isBuildVectorAllOnes(N01.getNode())) {
- if (!VT.is256BitVector() || N01->getOpcode() != ISD::INSERT_SUBVECTOR)
- return SDValue();
-
- SDValue V1 = N01->getOperand(0);
- SDValue V2 = N01->getOperand(1);
- if (V1.getOpcode() != ISD::INSERT_SUBVECTOR ||
- !V1.getOperand(0).isUndef() ||
- !ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) ||
- !ISD::isBuildVectorAllOnes(V2.getNode()))
- return SDValue();
- }
- return DAG.getNode(X86ISD::ANDNP, DL, VT, N00, N1);
+ return SDValue();
}
// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
@@ -30696,38 +31539,34 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// If this is a PCMPEQ or PCMPGT result that is bitwise-anded with 1 (this is
-/// the x86 lowering of a SETCC + ZEXT), replace the 'and' with a shift-right to
-/// eliminate loading the vector constant mask value. This relies on the fact
-/// that a PCMP always creates an all-ones or all-zeros bitmask per element.
-static SDValue combinePCMPAnd1(SDNode *N, SelectionDAG &DAG) {
+/// If this is a zero/all-bits result that is bitwise-anded with a low bits
+/// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and'
+/// with a shift-right to eliminate loading the vector constant mask value.
+static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
+ EVT VT0 = Op0.getValueType();
+ EVT VT1 = Op1.getValueType();
- // TODO: Use AssertSext to mark any nodes that have the property of producing
- // all-ones or all-zeros. Then check for that node rather than particular
- // opcodes.
- if (Op0.getOpcode() != X86ISD::PCMPEQ && Op0.getOpcode() != X86ISD::PCMPGT)
+ if (VT0 != VT1 || !VT0.isSimple() || !VT0.isInteger())
return SDValue();
- // The existence of the PCMP node guarantees that we have the required SSE2 or
- // AVX2 for a shift of this vector type, but there is no vector shift by
- // immediate for a vector with byte elements (PSRLB). 512-bit vectors use the
- // masked compare nodes, so they should not make it here.
- EVT VT0 = Op0.getValueType();
- EVT VT1 = Op1.getValueType();
- unsigned EltBitWidth = VT0.getScalarSizeInBits();
- if (VT0 != VT1 || EltBitWidth == 8)
+ APInt SplatVal;
+ if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) ||
+ !SplatVal.isMask())
return SDValue();
- assert(VT0.getSizeInBits() == 128 || VT0.getSizeInBits() == 256);
+ if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL))
+ return SDValue();
- APInt SplatVal;
- if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) || SplatVal != 1)
+ unsigned EltBitWidth = VT0.getScalarSizeInBits();
+ if (EltBitWidth != DAG.ComputeNumSignBits(Op0))
return SDValue();
SDLoc DL(N);
- SDValue ShAmt = DAG.getConstant(EltBitWidth - 1, DL, MVT::i8);
+ unsigned ShiftVal = SplatVal.countTrailingOnes();
+ SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt);
return DAG.getBitcast(N->getValueType(0), Shift);
}
@@ -30747,7 +31586,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG))
return R;
- if (SDValue ShiftRight = combinePCMPAnd1(N, DAG))
+ if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
return ShiftRight;
EVT VT = N->getValueType(0);
@@ -30760,7 +31599,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
SDValue Op(N, 0);
SmallVector<int, 1> NonceMask; // Just a placeholder.
NonceMask.push_back(0);
- if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
+ if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
DCI, Subtarget))
return SDValue(); // This routine will use CombineTo to replace N.
@@ -30969,7 +31808,7 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() &&
X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E &&
N->getOperand(1).getOpcode() == X86ISD::CMP &&
- N->getOperand(1).getConstantOperandVal(1) == 0 &&
+ isNullConstant(N->getOperand(1).getOperand(1)) &&
N->getOperand(1).getValueType().bitsGE(MVT::i32);
};
@@ -31272,6 +32111,74 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
}
+/// Check if truncation with saturation form type \p SrcVT to \p DstVT
+/// is valid for the given \p Subtarget.
+static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasAVX512())
+ return false;
+
+ // FIXME: Scalar type may be supported if we move it to vector register.
+ if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512)
+ return false;
+
+ EVT SrcElVT = SrcVT.getScalarType();
+ EVT DstElVT = DstVT.getScalarType();
+ if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
+ return false;
+ if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
+ return false;
+ if (SrcVT.is512BitVector() || Subtarget.hasVLX())
+ return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
+ return false;
+}
+
+/// Detect a pattern of truncation with saturation:
+/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched.
+static SDValue detectUSatPattern(SDValue In, EVT VT) {
+ if (In.getOpcode() != ISD::UMIN)
+ return SDValue();
+
+ //Saturation with truncation. We truncate from InVT to VT.
+ assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
+ "Unexpected types for truncate operation");
+
+ APInt C;
+ if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
+ // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
+ // the element size of the destination type.
+ return C.isMask(VT.getScalarSizeInBits()) ? In.getOperand(0) :
+ SDValue();
+ }
+ return SDValue();
+}
+
+/// Detect a pattern of truncation with saturation:
+/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
+/// The types should allow to use VPMOVUS* instruction on AVX512.
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched.
+static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
+ const X86Subtarget &Subtarget) {
+ if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
+ return SDValue();
+ return detectUSatPattern(In, VT);
+}
+
+static SDValue
+combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT))
+ return SDValue();
+ if (auto USatVal = detectUSatPattern(In, VT))
+ if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
+ return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
+ return SDValue();
+}
+
/// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
/// X86ISD::AVG instruction.
@@ -31664,7 +32571,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
Mld->getBasePtr(), NewMask, WideSrc0,
Mld->getMemoryVT(), Mld->getMemOperand(),
ISD::NON_EXTLOAD);
- SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
+ SDValue NewVec = getExtendInVec(X86ISD::VSEXT, dl, VT, WideLd, DAG);
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
}
@@ -31838,6 +32745,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
+ if (SDValue Val =
+ detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
+ return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
+ dl, Val, St->getBasePtr(),
+ St->getMemoryVT(), St->getMemOperand(), DAG);
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
@@ -32198,13 +33111,30 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT SrcVT = Src.getValueType();
- auto IsRepeatedOpOrOneUseConstant = [](SDValue Op0, SDValue Op1) {
- // TODO: Add extra cases where we can truncate both inputs for the
- // cost of one (or none).
- // e.g. TRUNC( BINOP( EXT( X ), EXT( Y ) ) ) --> BINOP( X, Y )
+ auto IsRepeatedOpOrFreeTruncation = [VT](SDValue Op0, SDValue Op1) {
+ unsigned TruncSizeInBits = VT.getScalarSizeInBits();
+
+ // Repeated operand, so we are only trading one output truncation for
+ // one input truncation.
if (Op0 == Op1)
return true;
+ // See if either operand has been extended from a smaller/equal size to
+ // the truncation size, allowing a truncation to combine with the extend.
+ unsigned Opcode0 = Op0.getOpcode();
+ if ((Opcode0 == ISD::ANY_EXTEND || Opcode0 == ISD::SIGN_EXTEND ||
+ Opcode0 == ISD::ZERO_EXTEND) &&
+ Op0.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
+ return true;
+
+ unsigned Opcode1 = Op1.getOpcode();
+ if ((Opcode1 == ISD::ANY_EXTEND || Opcode1 == ISD::SIGN_EXTEND ||
+ Opcode1 == ISD::ZERO_EXTEND) &&
+ Op1.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
+ return true;
+
+ // See if either operand is a single use constant which can be constant
+ // folded.
SDValue BC0 = peekThroughOneUseBitcasts(Op0);
SDValue BC1 = peekThroughOneUseBitcasts(Op1);
return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) ||
@@ -32236,7 +33166,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
- IsRepeatedOpOrOneUseConstant(Op0, Op1))
+ IsRepeatedOpOrFreeTruncation(Op0, Op1))
return TruncateArithmetic(Op0, Op1);
break;
}
@@ -32252,7 +33182,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegal(Opcode, VT) &&
- IsRepeatedOpOrOneUseConstant(Op0, Op1))
+ IsRepeatedOpOrFreeTruncation(Op0, Op1))
return TruncateArithmetic(Op0, Op1);
break;
}
@@ -32458,6 +33388,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
+ // Try to combine truncation with unsigned saturation.
+ if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
+ return Val;
+
// The bitcast source is a direct mmx result.
// Detect bitcasts between i32 to x86mmx
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
@@ -32804,6 +33738,34 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax);
}
+/// Do target-specific dag combines on X86ISD::ANDNP nodes.
+static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ // ANDNP(0, x) -> x
+ if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+ return N->getOperand(1);
+
+ // ANDNP(x, 0) -> 0
+ if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode()))
+ return getZeroVector(N->getSimpleValueType(0), Subtarget, DAG, SDLoc(N));
+
+ EVT VT = N->getValueType(0);
+
+ // Attempt to recursively combine a bitmask ANDNP with shuffles.
+ if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
+ SDValue Op(N, 0);
+ SmallVector<int, 1> NonceMask; // Just a placeholder.
+ NonceMask.push_back(0);
+ if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
+ /*Depth*/ 1, /*HasVarMask*/ false, DAG,
+ DCI, Subtarget))
+ return SDValue(); // This routine will use CombineTo to replace N.
+ }
+
+ return SDValue();
+}
+
static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
// BT ignores high bits in the bit index operand.
@@ -33065,13 +34027,22 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
if (!DCI.isBeforeLegalizeOps()) {
if (InVT == MVT::i1) {
SDValue Zero = DAG.getConstant(0, DL, VT);
- SDValue AllOnes =
- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
return DAG.getNode(ISD::SELECT, DL, VT, N0, AllOnes, Zero);
}
return SDValue();
}
+ if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR &&
+ isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) {
+ // Invert and sign-extend a boolean is the same as zero-extend and subtract
+ // 1 because 0 becomes -1 and 1 becomes 0. The subtract is efficiently
+ // lowered with an LEA or a DEC. This is the same as: select Bool, 0, -1.
+ // sext (xor Bool, -1) --> sub (zext Bool), 1
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT));
+ }
+
if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
return V;
@@ -33212,8 +34183,47 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// Optimize x == -y --> x+y == 0
-/// x != -y --> x+y != 0
+/// Try to map a 128-bit or larger integer comparison to vector instructions
+/// before type legalization splits it up into chunks.
+static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+ assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
+
+ // We're looking for an oversized integer equality comparison, but ignore a
+ // comparison with zero because that gets special treatment in EmitTest().
+ SDValue X = SetCC->getOperand(0);
+ SDValue Y = SetCC->getOperand(1);
+ EVT OpVT = X.getValueType();
+ unsigned OpSize = OpVT.getSizeInBits();
+ if (!OpVT.isScalarInteger() || OpSize < 128 || isNullConstant(Y))
+ return SDValue();
+
+ // TODO: Use PXOR + PTEST for SSE4.1 or later?
+ // TODO: Add support for AVX-512.
+ EVT VT = SetCC->getValueType(0);
+ SDLoc DL(SetCC);
+ if ((OpSize == 128 && Subtarget.hasSSE2()) ||
+ (OpSize == 256 && Subtarget.hasAVX2())) {
+ EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8;
+ SDValue VecX = DAG.getBitcast(VecVT, X);
+ SDValue VecY = DAG.getBitcast(VecVT, Y);
+
+ // If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
+ // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
+ // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
+ // setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq
+ // setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne
+ SDValue Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY);
+ SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
+ SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL,
+ MVT::i32);
+ return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC);
+ }
+
+ return SDValue();
+}
+
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
@@ -33222,21 +34232,27 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDLoc DL(N);
- if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
- if (isNullConstant(LHS.getOperand(0)) && LHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS,
- LHS.getOperand(1));
- return DAG.getSetCC(DL, N->getValueType(0), addV,
- DAG.getConstant(0, DL, addV.getValueType()), CC);
+ if (CC == ISD::SETNE || CC == ISD::SETEQ) {
+ EVT OpVT = LHS.getValueType();
+ // 0-x == y --> x+y == 0
+ // 0-x != y --> x+y != 0
+ if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
+ LHS.hasOneUse()) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, RHS, LHS.getOperand(1));
+ return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
}
- if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
- if (isNullConstant(RHS.getOperand(0)) && RHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS,
- RHS.getOperand(1));
- return DAG.getSetCC(DL, N->getValueType(0), addV,
- DAG.getConstant(0, DL, addV.getValueType()), CC);
+ // x == 0-y --> x+y == 0
+ // x != 0-y --> x+y != 0
+ if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
+ RHS.hasOneUse()) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
+ return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
}
+ if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget))
+ return V;
+ }
+
if (VT.getScalarType() == MVT::i1 &&
(CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
bool IsSEXT0 =
@@ -33293,56 +34309,13 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-// Helper function of performSETCCCombine. It is to materialize "setb reg"
-// as "sbb reg,reg", since it can be extended without zext and produces
-// an all-ones bit which is more useful than 0/1 in some cases.
-static SDValue MaterializeSETB(const SDLoc &DL, SDValue EFLAGS,
- SelectionDAG &DAG, MVT VT) {
- if (VT == MVT::i8)
- return DAG.getNode(ISD::AND, DL, VT,
- DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
- EFLAGS),
- DAG.getConstant(1, DL, VT));
- assert (VT == MVT::i1 && "Unexpected type for SECCC node");
- return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1,
- DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
- EFLAGS));
-}
-
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
SDValue EFLAGS = N->getOperand(1);
- if (CC == X86::COND_A) {
- // Try to convert COND_A into COND_B in an attempt to facilitate
- // materializing "setb reg".
- //
- // Do not flip "e > c", where "c" is a constant, because Cmp instruction
- // cannot take an immediate as its first operand.
- //
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
- EFLAGS.getValueType().isInteger() &&
- !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
- SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
- EFLAGS.getNode()->getVTList(),
- EFLAGS.getOperand(1), EFLAGS.getOperand(0));
- SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
- return MaterializeSETB(DL, NewEFLAGS, DAG, N->getSimpleValueType(0));
- }
- }
-
- // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
- // a zext and produces an all-ones bit which is more useful than 0/1 in some
- // cases.
- if (CC == X86::COND_B)
- return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
-
// Try to simplify the EFLAGS and condition code operands.
if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG))
return getSETCC(CC, Flags, DL, DAG);
@@ -33352,7 +34325,6 @@ static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
/// Optimize branch condition evaluation.
static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
SDValue EFLAGS = N->getOperand(3);
@@ -33538,45 +34510,159 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// fold (add Y, (sete X, 0)) -> adc 0, Y
-/// (add Y, (setne X, 0)) -> sbb -1, Y
-/// (sub (sete X, 0), Y) -> sbb 0, Y
-/// (sub (setne X, 0), Y) -> adc -1, Y
-static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
+/// Materialize "setb reg" as "sbb reg,reg", since it produces an all-ones bit
+/// which is more useful than 0/1 in some cases.
+static SDValue materializeSBB(SDNode *N, SDValue EFLAGS, SelectionDAG &DAG) {
SDLoc DL(N);
+ // "Condition code B" is also known as "the carry flag" (CF).
+ SDValue CF = DAG.getConstant(X86::COND_B, DL, MVT::i8);
+ SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, CF, EFLAGS);
+ MVT VT = N->getSimpleValueType(0);
+ if (VT == MVT::i8)
+ return DAG.getNode(ISD::AND, DL, VT, SBB, DAG.getConstant(1, DL, VT));
- // Look through ZExts.
- SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
- if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
- return SDValue();
+ assert(VT == MVT::i1 && "Unexpected type for SETCC node");
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SBB);
+}
+
+/// If this is an add or subtract where one operand is produced by a cmp+setcc,
+/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
+/// with CMP+{ADC, SBB}.
+static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
+ bool IsSub = N->getOpcode() == ISD::SUB;
+ SDValue X = N->getOperand(0);
+ SDValue Y = N->getOperand(1);
+
+ // If this is an add, canonicalize a zext operand to the RHS.
+ // TODO: Incomplete? What if both sides are zexts?
+ if (!IsSub && X.getOpcode() == ISD::ZERO_EXTEND &&
+ Y.getOpcode() != ISD::ZERO_EXTEND)
+ std::swap(X, Y);
+
+ // Look through a one-use zext.
+ bool PeekedThroughZext = false;
+ if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) {
+ Y = Y.getOperand(0);
+ PeekedThroughZext = true;
+ }
- SDValue SetCC = Ext.getOperand(0);
- if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
+ // If this is an add, canonicalize a setcc operand to the RHS.
+ // TODO: Incomplete? What if both sides are setcc?
+ // TODO: Should we allow peeking through a zext of the other operand?
+ if (!IsSub && !PeekedThroughZext && X.getOpcode() == X86ISD::SETCC &&
+ Y.getOpcode() != X86ISD::SETCC)
+ std::swap(X, Y);
+
+ if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse())
return SDValue();
- X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0);
+
+ if (CC == X86::COND_B) {
+ // X + SETB Z --> X + (mask SBB Z, Z)
+ // X - SETB Z --> X - (mask SBB Z, Z)
+ // TODO: Produce ADC/SBB here directly and avoid SETCC_CARRY?
+ SDValue SBB = materializeSBB(Y.getNode(), Y.getOperand(1), DAG);
+ if (SBB.getValueSizeInBits() != VT.getSizeInBits())
+ SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
+ return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
+ }
+
+ if (CC == X86::COND_A) {
+ SDValue EFLAGS = Y->getOperand(1);
+ // Try to convert COND_A into COND_B in an attempt to facilitate
+ // materializing "setb reg".
+ //
+ // Do not flip "e > c", where "c" is a constant, because Cmp instruction
+ // cannot take an immediate as its first operand.
+ //
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
+ EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
+ SDValue SBB = materializeSBB(Y.getNode(), NewEFLAGS, DAG);
+ if (SBB.getValueSizeInBits() != VT.getSizeInBits())
+ SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
+ return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
+ }
+ }
+
if (CC != X86::COND_E && CC != X86::COND_NE)
return SDValue();
- SDValue Cmp = SetCC.getOperand(1);
+ SDValue Cmp = Y.getOperand(1);
if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
!X86::isZeroNode(Cmp.getOperand(1)) ||
!Cmp.getOperand(0).getValueType().isInteger())
return SDValue();
- SDValue CmpOp0 = Cmp.getOperand(0);
- SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
- DAG.getConstant(1, DL, CmpOp0.getValueType()));
+ // (cmp Z, 1) sets the carry flag if Z is 0.
+ SDValue Z = Cmp.getOperand(0);
+ SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z,
+ DAG.getConstant(1, DL, Z.getValueType()));
+
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
- SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
+ // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1)
+ // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)
if (CC == X86::COND_NE)
- return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
- DL, OtherVal.getValueType(), OtherVal,
- DAG.getConstant(-1ULL, DL, OtherVal.getValueType()),
- NewCmp);
- return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
- DL, OtherVal.getValueType(), OtherVal,
- DAG.getConstant(0, DL, OtherVal.getValueType()), NewCmp);
+ return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X,
+ DAG.getConstant(-1ULL, DL, VT), NewCmp);
+
+ // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1)
+ // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1)
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X,
+ DAG.getConstant(0, DL, VT), NewCmp);
+}
+
+static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ SDValue MulOp = N->getOperand(0);
+ SDValue Phi = N->getOperand(1);
+
+ if (MulOp.getOpcode() != ISD::MUL)
+ std::swap(MulOp, Phi);
+ if (MulOp.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ ShrinkMode Mode;
+ if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ unsigned RegSize = 128;
+ if (Subtarget.hasBWI())
+ RegSize = 512;
+ else if (Subtarget.hasAVX2())
+ RegSize = 256;
+ unsigned VectorSize = VT.getVectorNumElements() * 16;
+ // If the vector size is less than 128, or greater than the supported RegSize,
+ // do not use PMADD.
+ if (VectorSize < 128 || VectorSize > RegSize)
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
+ VT.getVectorNumElements());
+ EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ VT.getVectorNumElements() / 2);
+
+ // Shrink the operands of mul.
+ SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(0));
+ SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(1));
+
+ // Madd vector size is half of the original vector size
+ SDValue Madd = DAG.getNode(X86ISD::VPMADDWD, DL, MAddVT, N0, N1);
+ // Fill the rest of the output with 0
+ SDValue Zero = getZeroVector(Madd.getSimpleValueType(), Subtarget, DAG, DL);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, Zero);
+ return DAG.getNode(ISD::ADD, DL, VT, Concat, Phi);
}
static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
@@ -33656,6 +34742,8 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (Flags->hasVectorReduction()) {
if (SDValue Sad = combineLoopSADPattern(N, DAG, Subtarget))
return Sad;
+ if (SDValue MAdd = combineLoopMAddPattern(N, DAG, Subtarget))
+ return MAdd;
}
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
@@ -33667,7 +34755,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1);
- return OptimizeConditionalInDecrement(N, DAG);
+ return combineAddOrSubToADCOrSBB(N, DAG);
}
static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
@@ -33700,36 +34788,44 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
isHorizontalBinOp(Op0, Op1, false))
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
- return OptimizeConditionalInDecrement(N, DAG);
+ return combineAddOrSubToADCOrSBB(N, DAG);
}
static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
SDLoc DL(N);
unsigned Opcode = N->getOpcode();
MVT VT = N->getSimpleValueType(0);
MVT SVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = SVT.getSizeInBits();
+
SDValue Op = N->getOperand(0);
MVT OpVT = Op.getSimpleValueType();
MVT OpEltVT = OpVT.getVectorElementType();
- unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements();
+ unsigned OpEltSizeInBits = OpEltVT.getSizeInBits();
+ unsigned InputBits = OpEltSizeInBits * NumElts;
// Perform any constant folding.
// FIXME: Reduce constant pool usage and don't fold when OptSize is enabled.
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
- unsigned NumDstElts = VT.getVectorNumElements();
- SmallBitVector Undefs(NumDstElts, false);
- SmallVector<APInt, 4> Vals(NumDstElts, APInt(SVT.getSizeInBits(), 0));
- for (unsigned i = 0; i != NumDstElts; ++i) {
- SDValue OpElt = Op.getOperand(i);
- if (OpElt.getOpcode() == ISD::UNDEF) {
- Undefs[i] = true;
+ APInt UndefElts;
+ SmallVector<APInt, 64> EltBits;
+ if (getTargetConstantBitsFromNode(Op, OpEltSizeInBits, UndefElts, EltBits)) {
+ APInt Undefs(NumElts, 0);
+ SmallVector<APInt, 4> Vals(NumElts, APInt(EltSizeInBits, 0));
+ bool IsZEXT =
+ (Opcode == X86ISD::VZEXT) || (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (UndefElts[i]) {
+ Undefs.setBit(i);
continue;
}
- APInt Cst = cast<ConstantSDNode>(OpElt.getNode())->getAPIntValue();
- Vals[i] = Opcode == X86ISD::VZEXT ? Cst.zextOrTrunc(SVT.getSizeInBits())
- : Cst.sextOrTrunc(SVT.getSizeInBits());
+ Vals[i] = IsZEXT ? EltBits[i].zextOrTrunc(EltSizeInBits)
+ : EltBits[i].sextOrTrunc(EltSizeInBits);
}
return getConstVector(Vals, Undefs, VT, DAG, DL);
}
@@ -33829,7 +34925,7 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
if (N->getOperand(0) == N->getOperand(1)) {
if (N->getOpcode() == X86ISD::PCMPEQ)
- return getOnesVector(VT, Subtarget, DAG, DL);
+ return getOnesVector(VT, DAG, DL);
if (N->getOpcode() == X86ISD::PCMPGT)
return getZeroVector(VT, Subtarget, DAG, DL);
}
@@ -33837,6 +34933,98 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDLoc dl(N);
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ MVT OpVT = N->getSimpleValueType(0);
+ MVT SubVecVT = SubVec.getSimpleValueType();
+
+ // If this is an insert of an extract, combine to a shuffle. Don't do this
+ // if the insert or extract can be represented with a subvector operation.
+ if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ SubVec.getOperand(0).getSimpleValueType() == OpVT &&
+ (IdxVal != 0 || !Vec.isUndef())) {
+ int ExtIdxVal = cast<ConstantSDNode>(SubVec.getOperand(1))->getZExtValue();
+ if (ExtIdxVal != 0) {
+ int VecNumElts = OpVT.getVectorNumElements();
+ int SubVecNumElts = SubVecVT.getVectorNumElements();
+ SmallVector<int, 64> Mask(VecNumElts);
+ // First create an identity shuffle mask.
+ for (int i = 0; i != VecNumElts; ++i)
+ Mask[i] = i;
+ // Now insert the extracted portion.
+ for (int i = 0; i != SubVecNumElts; ++i)
+ Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;
+
+ return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask);
+ }
+ }
+
+ // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
+ // load:
+ // (insert_subvector (insert_subvector undef, (load16 addr), 0),
+ // (load16 addr + 16), Elts/2)
+ // --> load32 addr
+ // or:
+ // (insert_subvector (insert_subvector undef, (load32 addr), 0),
+ // (load32 addr + 32), Elts/2)
+ // --> load64 addr
+ // or a 16-byte or 32-byte broadcast:
+ // (insert_subvector (insert_subvector undef, (load16 addr), 0),
+ // (load16 addr), Elts/2)
+ // --> X86SubVBroadcast(load16 addr)
+ // or:
+ // (insert_subvector (insert_subvector undef, (load32 addr), 0),
+ // (load32 addr), Elts/2)
+ // --> X86SubVBroadcast(load32 addr)
+ if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
+ Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
+ auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
+ if (Idx2 && Idx2->getZExtValue() == 0) {
+ SDValue SubVec2 = Vec.getOperand(1);
+ // If needed, look through bitcasts to get to the load.
+ if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
+ bool Fast;
+ unsigned Alignment = FirstLd->getAlignment();
+ unsigned AS = FirstLd->getAddressSpace();
+ const X86TargetLowering *TLI = Subtarget.getTargetLowering();
+ if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ OpVT, AS, Alignment, &Fast) && Fast) {
+ SDValue Ops[] = {SubVec2, SubVec};
+ if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+ return Ld;
+ }
+ }
+ // If lower/upper loads are the same and the only users of the load, then
+ // lower to a VBROADCASTF128/VBROADCASTI128/etc.
+ if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
+ if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
+ SDNode::areOnlyUsersOf({N, Vec.getNode()}, SubVec2.getNode())) {
+ return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
+ }
+ }
+ // If this is subv_broadcast insert into both halves, use a larger
+ // subv_broadcast.
+ if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) {
+ return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
+ SubVec.getOperand(0));
+ }
+ }
+ }
+
+ return SDValue();
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
@@ -33845,6 +35033,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::EXTRACT_VECTOR_ELT:
return combineExtractVectorElt(N, DAG, DCI, Subtarget);
+ case X86ISD::PEXTRW:
+ case X86ISD::PEXTRB:
+ return combineExtractVectorElt_SSE(N, DAG, DCI, Subtarget);
+ case ISD::INSERT_SUBVECTOR:
+ return combineInsertSubvector(N, DAG, DCI, Subtarget);
case ISD::VSELECT:
case ISD::SELECT:
case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);
@@ -33870,6 +35063,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
case ISD::FNEG: return combineFneg(N, DAG, Subtarget);
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
+ case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget);
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
case X86ISD::FXOR:
@@ -33884,12 +35078,18 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget);
case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
- case X86ISD::SETCC: return combineX86SetCC(N, DAG, DCI, Subtarget);
- case X86ISD::BRCOND: return combineBrCond(N, DAG, DCI, Subtarget);
+ case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
+ case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
case X86ISD::VSHLI:
- case X86ISD::VSRLI: return combineVectorShift(N, DAG, DCI, Subtarget);
+ case X86ISD::VSRAI:
+ case X86ISD::VSRLI:
+ return combineVectorShiftImm(N, DAG, DCI, Subtarget);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
case X86ISD::VSEXT:
case X86ISD::VZEXT: return combineVSZext(N, DAG, DCI, Subtarget);
+ case X86ISD::PINSRB:
+ case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::INSERTPS:
case X86ISD::PALIGNR:
@@ -34717,10 +35917,20 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return Res;
}
- // 'A' means EAX + EDX.
+ // 'A' means [ER]AX + [ER]DX.
if (Constraint == "A") {
- Res.first = X86::EAX;
- Res.second = &X86::GR32_ADRegClass;
+ if (Subtarget.is64Bit()) {
+ Res.first = X86::RAX;
+ Res.second = &X86::GR64_ADRegClass;
+ } else if (Subtarget.is32Bit()) {
+ Res.first = X86::EAX;
+ Res.second = &X86::GR32_ADRegClass;
+ } else if (Subtarget.is16Bit()) {
+ Res.first = X86::AX;
+ Res.second = &X86::GR16_ADRegClass;
+ } else {
+ llvm_unreachable("Expecting 64, 32 or 16 bit subtarget");
+ }
return Res;
}
return Res;
@@ -34812,7 +36022,7 @@ int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
return -1;
}
-bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
+bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// Integer division on x86 is expensive. However, when aggressively optimizing
// for code size, we prefer to use a div instruction, as it is usually smaller
// than the alternative sequence.
@@ -34820,8 +36030,8 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
// integer division, leaving the division as-is is a loss even in terms of
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
- bool OptSize = Attr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::MinSize);
+ bool OptSize =
+ Attr.hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize);
return OptSize && !VT.isVector();
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 37f9353042b1..ab4910daca02 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -149,8 +149,7 @@ namespace llvm {
WrapperRIP,
/// Copies a 64-bit value from the low word of an XMM vector
- /// to an MMX vector. If you think this is too close to the previous
- /// mnemonic, so do I; blame Intel.
+ /// to an MMX vector.
MOVDQ2Q,
/// Copies a 32-bit value from the low word of a MMX
@@ -179,7 +178,7 @@ namespace llvm {
/// Insert the lower 16-bits of a 32-bit value to a vector,
/// corresponds to X86::PINSRW.
- PINSRW, MMX_PINSRW,
+ PINSRW,
/// Shuffle 16 8-bit values within a vector.
PSHUFB,
@@ -195,21 +194,21 @@ namespace llvm {
/// Blend where the selector is an immediate.
BLENDI,
- /// Blend where the condition has been shrunk.
- /// This is used to emphasize that the condition mask is
- /// no more valid for generic VSELECT optimizations.
+ /// Dynamic (non-constant condition) vector blend where only the sign bits
+ /// of the condition elements are used. This is used to enforce that the
+ /// condition mask is not valid for generic VSELECT optimizations.
SHRUNKBLEND,
/// Combined add and sub on an FP vector.
ADDSUB,
// FP vector ops with rounding mode.
- FADD_RND,
- FSUB_RND,
- FMUL_RND,
- FDIV_RND,
- FMAX_RND,
- FMIN_RND,
+ FADD_RND, FADDS_RND,
+ FSUB_RND, FSUBS_RND,
+ FMUL_RND, FMULS_RND,
+ FDIV_RND, FDIVS_RND,
+ FMAX_RND, FMAXS_RND,
+ FMIN_RND, FMINS_RND,
FSQRT_RND, FSQRTS_RND,
// FP vector get exponent.
@@ -239,9 +238,6 @@ namespace llvm {
FHADD,
FHSUB,
- // Integer absolute value
- ABS,
-
// Detect Conflicts Within a Vector
CONFLICT,
@@ -251,6 +247,9 @@ namespace llvm {
/// Commutative FMIN and FMAX.
FMAXC, FMINC,
+ /// Scalar intrinsic floating point max and min.
+ FMAXS, FMINS,
+
/// Floating point reciprocal-sqrt and reciprocal approximation.
/// Note that these typically require refinement
/// in order to obtain suitable precision.
@@ -320,6 +319,9 @@ namespace llvm {
// Vector shift elements by immediate
VSHLI, VSRLI, VSRAI,
+ // Shifts of mask registers.
+ KSHIFTL, KSHIFTR,
+
// Bit rotate by immediate
VROTLI, VROTRI,
@@ -443,8 +445,7 @@ namespace llvm {
// Broadcast subvector to vector.
SUBV_BROADCAST,
- // Insert/Extract vector element.
- VINSERT,
+ // Extract vector element.
VEXTRACT,
/// SSE4A Extraction and Insertion.
@@ -686,6 +687,9 @@ namespace llvm {
unsigned getJumpTableEncoding() const override;
bool useSoftFloat() const override;
+ void markLibCallAttributes(MachineFunction *MF, unsigned CC,
+ ArgListTy &Args) const override;
+
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
return MVT::i8;
}
@@ -806,8 +810,17 @@ namespace llvm {
return false;
}
+ bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+
bool hasAndNotCompare(SDValue Y) const override;
+ bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+ return VT.isScalarInteger();
+ }
+
+ /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
+ MVT hasFastEqualityCompare(unsigned NumBits) const override;
+
/// Return the value type to use for ISD::SETCC.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
@@ -817,11 +830,13 @@ namespace llvm {
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
/// Determine the number of bits in the operation that are sign bits.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const override;
@@ -984,6 +999,10 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool convertSelectOfConstantsToMath() const override {
+ return true;
+ }
+
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
/// with this index.
bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override;
@@ -1035,7 +1054,7 @@ namespace llvm {
/// \brief Customize the preferred legalization strategy for certain types.
LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
- bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
+ bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool supportSwiftError() const override;
@@ -1076,7 +1095,8 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals,
+ uint32_t *RegMask) const;
SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::InputArg> &ArgInfo,
const SDLoc &dl, SelectionDAG &DAG,
@@ -1138,8 +1158,7 @@ namespace llvm {
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_TO_INT(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index ba1aede3c1a0..08b501ff20bf 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -38,7 +38,9 @@ multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
}
-multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, bit Commutable = 0,
+ string Ver = ""> {
+ let isCommutable = Commutable in
def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
@@ -63,25 +65,25 @@ multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> {
(bitconvert (load_mmx addr:$src))))]>;
}
-defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", 1>;
defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
-defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
-defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd", 1>;
+defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq", 1>;
defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
-defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul", 1>;
defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
-defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
-defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
+defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub", 1>;
+defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr", 1>;
defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
-defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
+defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", 1>;
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
@@ -98,6 +100,6 @@ def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
// "3DNowA" instructions
defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
-defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
-defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", 0, "a">;
+defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", 0, "a">;
defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 230d1700b8d2..c38c13bb9757 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -34,13 +34,6 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
ValueType KVT = !cast<ValueType>(!if (!eq (NumElts, 1), "i1",
"v" # NumElts # "i1"));
- // The GPR register class that can hold the write mask. Use GR8 for fewer
- // than 8 elements. Use shift-right and equal to work around the lack of
- // !lt in tablegen.
- RegisterClass MRC =
- !cast<RegisterClass>("GR" #
- !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
-
// Suffix used in the instruction mnemonic.
string Suffix = suffix;
@@ -69,6 +62,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
// The corresponding memory operand, e.g. i512mem for VR512.
X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
+ // FP scalar memory operand for intrinsics - ssmem/sdmem.
+ Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
+ !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
// Load patterns
// Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
@@ -89,6 +85,12 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
+ ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
+ !cast<ComplexPattern>("sse_load_f32"),
+ !if (!eq (EltTypeName, "f64"),
+ !cast<ComplexPattern>("sse_load_f64"),
+ ?));
+
// The corresponding float type, e.g. v16f32 for v16i32
// Note: For EltSize < 32, FloatVT is illegal and TableGen
// fails to compile, so we choose FloatVT = VT
@@ -207,7 +209,7 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
Pattern, itin>;
// Prefer over VMOV*rrk Pat<>
- let AddedComplexity = 20, isCommutable = IsKCommutable in
+ let isCommutable = IsKCommutable in
def NAME#k: AVX512<O, F, Outs, MaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
"$dst {${mask}}, "#IntelSrcAsm#"}",
@@ -219,7 +221,7 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
// Zero mask does not add any restrictions to commute operands transformation.
// So, it is Ok to use IsCommutable instead of IsKCommutable.
- let AddedComplexity = 30, isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
+ let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, "#IntelSrcAsm#"}",
@@ -250,6 +252,23 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
MaskingConstraint, NoItinerary, IsCommutable,
IsKCommutable>;
+// Similar to AVX512_maskable_common, but with scalar types.
+multiclass AVX512_maskable_fp_common<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs,
+ dag Ins, dag MaskingIns, dag ZeroMaskingIns,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ SDNode Select = vselect,
+ string MaskingConstraint = "",
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0,
+ bit IsKCommutable = 0> :
+ AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
+ AttSrcAsm, IntelSrcAsm,
+ [], [], [],
+ MaskingConstraint, NoItinerary, IsCommutable,
+ IsKCommutable>;
+
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
// perserved vector elements come from a new dummy input operand tied to $dst.
@@ -460,7 +479,7 @@ def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [HasVLX], SchedRW = [WriteZero] in {
+ isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
[(set VR128X:$dst, (v4i32 immAllZerosV))]>;
def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
@@ -470,7 +489,7 @@ def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasVLX, HasDQI] in {
+ isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
[(set FR32X:$dst, fp32imm0)]>;
def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
@@ -484,7 +503,7 @@ multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To
PatFrag vinsert_insert> {
let ExeDomain = To.ExeDomain in {
defm rr : AVX512_maskable<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
- (ins To.RC:$src1, From.RC:$src2, i32u8imm:$src3),
+ (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
"vinsert" # From.EltTypeName # "x" # From.NumElts,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(vinsert_insert:$src3 (To.VT To.RC:$src1),
@@ -492,7 +511,7 @@ multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To
(iPTR imm))>, AVX512AIi8Base, EVEX_4V;
defm rm : AVX512_maskable<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
- (ins To.RC:$src1, From.MemOp:$src2, i32u8imm:$src3),
+ (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
"vinsert" # From.EltTypeName # "x" # From.NumElts,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(vinsert_insert:$src3 (To.VT To.RC:$src1),
@@ -625,14 +644,14 @@ multiclass vextract_for_size<int Opcode,
// vextract_extract), we interesting only in patterns without mask,
// intrinsics pattern match generated bellow.
defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
- (ins From.RC:$src1, i32u8imm:$idx),
+ (ins From.RC:$src1, u8imm:$idx),
"vextract" # To.EltTypeName # "x" # To.NumElts,
"$idx, $src1", "$src1, $idx",
[(set To.RC:$dst, (vextract_extract:$idx (From.VT From.RC:$src1),
(iPTR imm)))]>,
AVX512AIi8Base, EVEX;
def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
- (ins To.MemOp:$dst, From.RC:$src1, i32u8imm:$idx),
+ (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
"vextract" # To.EltTypeName # "x" # To.NumElts #
"\t{$idx, $src1, $dst|$dst, $src1, $idx}",
[(store (To.VT (vextract_extract:$idx
@@ -642,7 +661,7 @@ multiclass vextract_for_size<int Opcode,
let mayStore = 1, hasSideEffects = 0 in
def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
(ins To.MemOp:$dst, To.KRCWM:$mask,
- From.RC:$src1, i32u8imm:$idx),
+ From.RC:$src1, u8imm:$idx),
"vextract" # To.EltTypeName # "x" # To.NumElts #
"\t{$idx, $src1, $dst {${mask}}|"
"$dst {${mask}}, $src1, $idx}",
@@ -846,32 +865,20 @@ def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
// broadcast with a scalar argument.
multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
-
- let isCodeGenOnly = 1 in {
- def r_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
- (ins SrcInfo.FRC:$src), OpcodeStr#"\t{$src, $dst|$dst, $src}",
- [(set DestInfo.RC:$dst, (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)))]>,
- Requires<[HasAVX512]>, T8PD, EVEX;
-
- let Constraints = "$src0 = $dst" in
- def rk_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
- (ins DestInfo.RC:$src0, DestInfo.KRCWM:$mask, SrcInfo.FRC:$src),
- OpcodeStr#"\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(set DestInfo.RC:$dst,
- (vselect DestInfo.KRCWM:$mask,
- (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
- DestInfo.RC:$src0))]>,
- Requires<[HasAVX512]>, T8PD, EVEX, EVEX_K;
-
- def rkz_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
- (ins DestInfo.KRCWM:$mask, SrcInfo.FRC:$src),
- OpcodeStr#"\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
- [(set DestInfo.RC:$dst,
- (vselect DestInfo.KRCWM:$mask,
- (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
- DestInfo.ImmAllZerosV))]>,
- Requires<[HasAVX512]>, T8PD, EVEX, EVEX_KZ;
- } // let isCodeGenOnly = 1 in
+ def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
+ (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
+ def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
+ (X86VBroadcast SrcInfo.FRC:$src),
+ DestInfo.RC:$src0)),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
+ DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
+ (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
+ def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
+ (X86VBroadcast SrcInfo.FRC:$src),
+ DestInfo.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
+ DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
}
multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
@@ -892,7 +899,6 @@ multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
(SrcInfo.VT (scalar_to_vector
(SrcInfo.ScalarLdFrag addr:$src))))),
(!cast<Instruction>(NAME#DestInfo.ZSuffix#m) addr:$src)>;
- let AddedComplexity = 20 in
def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
(X86VBroadcast
(SrcInfo.VT (scalar_to_vector
@@ -900,7 +906,6 @@ multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
DestInfo.RC:$src0)),
(!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
DestInfo.RC:$src0, DestInfo.KRCWM:$mask, addr:$src)>;
- let AddedComplexity = 30 in
def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
(X86VBroadcast
(SrcInfo.VT (scalar_to_vector
@@ -951,39 +956,42 @@ def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
(VBROADCASTSDZm addr:$src)>;
multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _,
+ SDPatternOperator OpNode,
RegisterClass SrcRC> {
+ let ExeDomain = _.ExeDomain in
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins SrcRC:$src),
"vpbroadcast"##_.Suffix, "$src", "$src",
- (_.VT (X86VBroadcast SrcRC:$src))>, T8PD, EVEX;
+ (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX;
}
multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
+ SDPatternOperator OpNode,
RegisterClass SrcRC, Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_int_broadcast_reg<opc, _.info512, SrcRC>, EVEX_V512;
+ defm Z : avx512_int_broadcast_reg<opc, _.info512, OpNode, SrcRC>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_int_broadcast_reg<opc, _.info256, SrcRC>, EVEX_V256;
- defm Z128 : avx512_int_broadcast_reg<opc, _.info128, SrcRC>, EVEX_V128;
+ defm Z256 : avx512_int_broadcast_reg<opc, _.info256, OpNode, SrcRC>, EVEX_V256;
+ defm Z128 : avx512_int_broadcast_reg<opc, _.info128, OpNode, SrcRC>, EVEX_V128;
}
}
let isCodeGenOnly = 1 in {
-defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, GR8,
- HasBWI>;
-defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, GR16,
- HasBWI>;
+defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info,
+ X86VBroadcast, GR8, HasBWI>;
+defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info,
+ X86VBroadcast, GR16, HasBWI>;
}
let isAsmParserOnly = 1 in {
defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info,
- GR32, HasBWI>;
+ null_frag, GR32, HasBWI>;
defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info,
- GR32, HasBWI>;
+ null_frag, GR32, HasBWI>;
}
-defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, GR32,
- HasAVX512>;
-defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, GR64,
- HasAVX512>, VEX_W;
+defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
+ X86VBroadcast, GR32, HasAVX512>;
+defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
+ X86VBroadcast, GR64, HasAVX512>, VEX_W;
def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
(VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
@@ -1035,7 +1043,18 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
AVX5128IBase, EVEX;
}
+let Predicates = [HasAVX512] in {
+ // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
+ def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQZm addr:$src)>;
+}
+
let Predicates = [HasVLX, HasBWI] in {
+ // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQZ128m addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQZ256m addr:$src)>;
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
@@ -1075,18 +1094,12 @@ def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
-def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
- (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
- (v8f32 VR256X:$src), 1)>;
def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
(VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v4f64 VR256X:$src), 1)>;
def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v4i64 VR256X:$src), 1)>;
-def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
- (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
- (v8i32 VR256X:$src), 1)>;
def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v16i16 VR256X:$src), 1)>;
@@ -1098,46 +1111,6 @@ def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4rm addr:$src)>;
def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4rm addr:$src)>;
-
-// Provide fallback in case the load node that is used in the patterns above
-// is used by additional users, which prevents the pattern selection.
-def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
- (VINSERTF64x4Zrr
- (VINSERTF32x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v8f64 (VINSERTF32x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
-def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
- (VINSERTI64x4Zrr
- (VINSERTI32x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v8i64 (VINSERTI32x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
-
-def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
- (VINSERTI64x4Zrr
- (VINSERTI32x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v32i16 (VINSERTI32x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
-def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
- (VINSERTI64x4Zrr
- (VINSERTI32x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v64i8 (VINSERTI32x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
}
let Predicates = [HasVLX] in {
@@ -1209,25 +1182,6 @@ def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
-def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
- (VINSERTF64x4Zrr
- (VINSERTF32x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v16f32 (VINSERTF32x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
-def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
- (VINSERTI64x4Zrr
- (VINSERTI32x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v16i32 (VINSERTI32x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
-
def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
(VBROADCASTF64X4rm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
@@ -1265,25 +1219,6 @@ def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
(VINSERTI32x8Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8i32 VR256X:$src), 1)>;
-
-def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
- (VINSERTF32x8Zrr
- (VINSERTF32x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v16f32 (VINSERTF32x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
-def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
- (VINSERTI32x8Zrr
- (VINSERTI32x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v16i32 (VINSERTI32x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
}
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
@@ -1310,6 +1245,13 @@ defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
avx512vl_f32_info, avx512vl_f64_info>;
+let Predicates = [HasVLX] in {
+def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
+ (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
+def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
+ (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
+}
+
def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
(VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
@@ -1604,13 +1546,13 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc)>, EVEX_4V;
+ let mayLoad = 1 in
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1),
- (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
+ (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
@@ -1629,6 +1571,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V;
+ let mayLoad = 1 in
defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
@@ -1667,8 +1610,10 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
}
let Predicates = [HasAVX512] in {
+ let ExeDomain = SSEPackedSingle in
defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd>,
AVX512XSIi8Base;
+ let ExeDomain = SSEPackedDouble in
defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd>,
AVX512XDIi8Base, VEX_W;
}
@@ -2087,22 +2032,20 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
- let AddedComplexity = 20 in {
- def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
- (ins _.MemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set _.KRC:$dst,
- (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
- (i32 imm:$src2)))], NoItinerary>;
- def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
- (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##
- "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
- [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _.KRC:$dst,
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
- (i32 imm:$src2))))], NoItinerary>, EVEX_K;
- }
+ (i32 imm:$src2)))], NoItinerary>;
+ def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+ [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 imm:$src2))))], NoItinerary>, EVEX_K;
}
}
@@ -2242,28 +2185,26 @@ let Predicates = [HasBWI] in {
// GR from/to mask register
def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
- (COPY_TO_REGCLASS GR16:$src, VK16)>;
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
- (COPY_TO_REGCLASS VK16:$src, GR16)>;
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
- (COPY_TO_REGCLASS GR8:$src, VK8)>;
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
- (COPY_TO_REGCLASS VK8:$src, GR8)>;
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
(KMOVWrk VK16:$src)>;
def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
- (i32 (INSERT_SUBREG (IMPLICIT_DEF),
- (i16 (COPY_TO_REGCLASS VK16:$src, GR16)), sub_16bit))>;
+ (COPY_TO_REGCLASS VK16:$src, GR32)>;
def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
- (MOVZX32rr8 (COPY_TO_REGCLASS VK8:$src, GR8))>, Requires<[NoDQI]>;
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>;
def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
(KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
- (i32 (INSERT_SUBREG (IMPLICIT_DEF),
- (i8 (COPY_TO_REGCLASS VK8:$src, GR8)), sub_8bit))>;
+ (COPY_TO_REGCLASS VK8:$src, GR32)>;
def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
(COPY_TO_REGCLASS GR32:$src, VK32)>;
@@ -2296,20 +2237,20 @@ let Predicates = [HasDQI] in {
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(store VK1:$src, addr:$dst),
(MOV8mr addr:$dst,
- (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
- sub_8bit))>;
+ (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)),
+ sub_8bit)))>;
def : Pat<(store VK2:$src, addr:$dst),
(MOV8mr addr:$dst,
- (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK2:$src, VK16)),
- sub_8bit))>;
+ (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK2:$src, GR32)),
+ sub_8bit)))>;
def : Pat<(store VK4:$src, addr:$dst),
(MOV8mr addr:$dst,
- (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK4:$src, VK16)),
- sub_8bit))>;
+ (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK4:$src, GR32)),
+ sub_8bit)))>;
def : Pat<(store VK8:$src, addr:$dst),
(MOV8mr addr:$dst,
- (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
- sub_8bit))>;
+ (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)),
+ sub_8bit)))>;
def : Pat<(v8i1 (load addr:$src)),
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
@@ -2340,44 +2281,41 @@ let Predicates = [HasBWI] in {
let Predicates = [HasAVX512] in {
def : Pat<(i1 (trunc (i64 GR64:$src))),
- (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit),
- (i32 1))), VK1)>;
+ (COPY_TO_REGCLASS (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit),
+ (i32 1)), VK1)>;
def : Pat<(i1 (trunc (i32 GR32:$src))),
- (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 $src, (i32 1))), VK1)>;
+ (COPY_TO_REGCLASS (AND32ri8 $src, (i32 1)), VK1)>;
def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))),
(COPY_TO_REGCLASS GR32:$src, VK1)>;
def : Pat<(i1 (trunc (i8 GR8:$src))),
(COPY_TO_REGCLASS
- (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR8:$src, sub_8bit), (i32 1))),
- VK1)>;
+ (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR8:$src, sub_8bit), (i32 1)), VK1)>;
def : Pat<(i1 (trunc (i16 GR16:$src))),
(COPY_TO_REGCLASS
- (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR16:$src, sub_16bit), (i32 1))),
- VK1)>;
+ (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR16:$src, sub_16bit), (i32 1)), VK1)>;
def : Pat<(i32 (zext VK1:$src)),
- (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
+ (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1))>;
def : Pat<(i32 (anyext VK1:$src)),
(COPY_TO_REGCLASS VK1:$src, GR32)>;
def : Pat<(i8 (zext VK1:$src)),
(EXTRACT_SUBREG
- (AND32ri8 (KMOVWrk
- (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
+ (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_8bit)>;
def : Pat<(i8 (anyext VK1:$src)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_8bit)>;
def : Pat<(i64 (zext VK1:$src)),
- (AND64ri8 (SUBREG_TO_REG (i64 0),
- (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
+ (SUBREG_TO_REG (i64 0),
+ (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_32bit)>;
def : Pat<(i64 (anyext VK1:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
@@ -2385,8 +2323,7 @@ let Predicates = [HasAVX512] in {
def : Pat<(i16 (zext VK1:$src)),
(EXTRACT_SUBREG
- (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
- sub_16bit)>;
+ (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_16bit)>;
def : Pat<(i16 (anyext VK1:$src)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_16bit)>;
@@ -2440,15 +2377,6 @@ multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>;
-multiclass avx512_mask_unop_int<string IntName, string InstName> {
- let Predicates = [HasAVX512] in
- def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
- (i16 GR16:$src)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
- (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
-}
-defm : avx512_mask_unop_int<"knot", "KNOT">;
-
// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
let Predicates = [HasAVX512, NoDQI] in
def : Pat<(vnot VK8:$src),
@@ -2497,21 +2425,6 @@ defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>;
defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>;
defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>;
-multiclass avx512_mask_binop_int<string IntName, string InstName> {
- let Predicates = [HasAVX512] in
- def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
- (i16 GR16:$src1), (i16 GR16:$src2)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
- (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
- (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
-}
-
-defm : avx512_mask_binop_int<"kand", "KAND">;
-defm : avx512_mask_binop_int<"kandn", "KANDN">;
-defm : avx512_mask_binop_int<"kor", "KOR">;
-defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
-defm : avx512_mask_binop_int<"kxor", "KXOR">;
-
multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
Instruction Inst> {
// With AVX512F, 8-bit mask is promoted to 16-bit mask,
@@ -2613,8 +2526,8 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
}
}
-defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
-defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
+defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
+defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
// Mask setting all 0s or 1s
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
@@ -2625,7 +2538,6 @@ multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
}
multiclass avx512_mask_setop_w<PatFrag Val> {
- defm B : avx512_mask_setop<VK8, v8i1, Val>;
defm W : avx512_mask_setop<VK16, v16i1, Val>;
defm D : avx512_mask_setop<VK32, v32i1, Val>;
defm Q : avx512_mask_setop<VK64, v64i1, Val>;
@@ -2642,9 +2554,11 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
- def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
- def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
- def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
+ let AddedComplexity = 10 in { // To optimize isel table.
+ def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
+ def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
+ def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
+ }
}
// Patterns for kmask insert_subvector/extract_subvector to/from index=0
@@ -2695,12 +2609,12 @@ def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
// Patterns for kmask shift
multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
- def : Pat<(VT (X86vshli RC:$src, (i8 imm:$imm))),
+ def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))),
(VT (COPY_TO_REGCLASS
(KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16),
(I8Imm $imm)),
RC))>;
- def : Pat<(VT (X86vsrli RC:$src, (i8 imm:$imm))),
+ def : Pat<(VT (X86kshiftr RC:$src, (i8 imm:$imm))),
(VT (COPY_TO_REGCLASS
(KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16),
(I8Imm $imm)),
@@ -2738,7 +2652,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
[(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
_.ExeDomain>, EVEX;
- let Constraints = "$src0 = $dst" in {
+ let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
@@ -3160,6 +3074,10 @@ def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src)
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64X:$dst, (bitconvert GR64:$src))],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
+def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
+ EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64X:$src))],
@@ -3272,20 +3190,22 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode,
(scalar_to_vector _.FRC:$src2))))],
_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
- (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
+ (ins _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
"$dst {${mask}} {z}, $src1, $src2}"),
[(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
- (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ (_.VT (OpNode _.RC:$src1,
+ (scalar_to_vector _.FRC:$src2))),
_.ImmAllZerosV)))],
_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ;
let Constraints = "$src0 = $dst" in
def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
- (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
+ (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
- (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ (_.VT (OpNode _.RC:$src1,
+ (scalar_to_vector _.FRC:$src2))),
(_.VT _.RC:$src0))))],
_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K;
let canFoldAsLoad = 1, isReMaterializable = 1 in
@@ -3335,8 +3255,7 @@ def : Pat<(_.VT (OpNode _.RC:$src0,
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrk)
(COPY_TO_REGCLASS _.FRC:$src2, _.RC),
(COPY_TO_REGCLASS GR32:$mask, VK1WM),
- (_.VT _.RC:$src0),
- (COPY_TO_REGCLASS _.FRC:$src1, _.RC)),
+ (_.VT _.RC:$src0), _.FRC:$src1),
_.RC)>;
def : Pat<(_.VT (OpNode _.RC:$src0,
@@ -3346,10 +3265,8 @@ def : Pat<(_.VT (OpNode _.RC:$src0,
(_.EltVT ZeroFP))))))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrkz)
(COPY_TO_REGCLASS GR32:$mask, VK1WM),
- (_.VT _.RC:$src0),
- (COPY_TO_REGCLASS _.FRC:$src1, _.RC)),
+ (_.VT _.RC:$src0), _.FRC:$src1),
_.RC)>;
-
}
multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
@@ -3359,14 +3276,31 @@ def : Pat<(masked_store addr:$dst, Mask,
(_.info512.VT (insert_subvector undef,
(_.info256.VT (insert_subvector undef,
(_.info128.VT _.info128.RC:$src),
- (i64 0))),
- (i64 0)))),
+ (iPTR 0))),
+ (iPTR 0)))),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
}
+multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
+ AVX512VLVectorVTInfo _,
+ dag Mask, RegisterClass MaskRC,
+ SubRegIndex subreg> {
+
+def : Pat<(masked_store addr:$dst, Mask,
+ (_.info512.VT (insert_subvector undef,
+ (_.info256.VT (insert_subvector undef,
+ (_.info128.VT _.info128.RC:$src),
+ (iPTR 0))),
+ (iPTR 0)))),
+ (!cast<Instruction>(InstrStr#mrk) addr:$dst,
+ (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
+ (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+
+}
+
multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
dag Mask, RegisterClass MaskRC> {
@@ -3374,7 +3308,7 @@ def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask,
(_.info512.VT (bitconvert
(v16i32 immAllZerosV))))),
- (i64 0))),
+ (iPTR 0))),
(!cast<Instruction>(InstrStr#rmkz)
(i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)),
addr:$srcAddr)>;
@@ -3384,53 +3318,81 @@ def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (insert_subvector undef,
(_.info256.VT (insert_subvector undef,
(_.info128.VT (X86vzmovl _.info128.RC:$src)),
- (i64 0))),
- (i64 0))))),
- (i64 0))),
+ (iPTR 0))),
+ (iPTR 0))))),
+ (iPTR 0))),
(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
(i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)),
addr:$srcAddr)>;
}
+multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
+ AVX512VLVectorVTInfo _,
+ dag Mask, RegisterClass MaskRC,
+ SubRegIndex subreg> {
+
+def : Pat<(_.info128.VT (extract_subvector
+ (_.info512.VT (masked_load addr:$srcAddr, Mask,
+ (_.info512.VT (bitconvert
+ (v16i32 immAllZerosV))))),
+ (iPTR 0))),
+ (!cast<Instruction>(InstrStr#rmkz)
+ (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
+ addr:$srcAddr)>;
+
+def : Pat<(_.info128.VT (extract_subvector
+ (_.info512.VT (masked_load addr:$srcAddr, Mask,
+ (_.info512.VT (insert_subvector undef,
+ (_.info256.VT (insert_subvector undef,
+ (_.info128.VT (X86vzmovl _.info128.RC:$src)),
+ (iPTR 0))),
+ (iPTR 0))))),
+ (iPTR 0))),
+ (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
+ (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
+ addr:$srcAddr)>;
+
+}
+
defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
-defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
- (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>;
-defm : avx512_store_scalar_lowering<"VMOVSDZ", avx512vl_f64_info,
- (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>;
+defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
+ (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
+defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
+ (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
-defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
- (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>;
-defm : avx512_load_scalar_lowering<"VMOVSDZ", avx512vl_f64_info,
- (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>;
+defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
+ (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
+defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
+ (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
(COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
- VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
+ VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
(COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
- VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
+ VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
- (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
+ (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM)),
(COPY_TO_REGCLASS VR128X:$src, FR32X))>;
let hasSideEffects = 0 in
defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info,
- (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
+ (outs VR128X:$dst), (ins VR128X:$src1, FR32X:$src2),
"vmovss.s", "$src2, $src1", "$src1, $src2", []>,
XS, EVEX_4V, VEX_LIG;
let hasSideEffects = 0 in
-defm VMOVSSDrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info,
- (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
+defm VMOVSDZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info,
+ (outs VR128X:$dst), (ins VR128X:$src1, FR64X:$src2),
"vmovsd.s", "$src2, $src1", "$src1, $src2", []>,
XD, EVEX_4V, VEX_LIG, VEX_W;
@@ -3439,31 +3401,31 @@ let Predicates = [HasAVX512] in {
// Move scalar to XMM zero-extended, zeroing a VR128X then do a
// MOVS{S,D} to the lower bits.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
- (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
+ (VMOVSSZrr (v4f32 (AVX512_128_SET0)), FR32X:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
- (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ (VMOVSSZrr (v4f32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
- (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ (VMOVSSZrr (v4i32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
- (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
+ (VMOVSDZrr (v2f64 (AVX512_128_SET0)), FR64X:$src)>;
}
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4f32 (V_SET0)),
+ (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4i32 (V_SET0)),
+ (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4f32 (V_SET0)),
+ (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4i32 (V_SET0)),
+ (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
let AddedComplexity = 20 in {
@@ -3525,11 +3487,11 @@ let Predicates = [HasAVX512] in {
}
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
+ (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
FR32X:$src)), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
(v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
+ (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
FR64X:$src)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
@@ -3538,18 +3500,18 @@ let Predicates = [HasAVX512] in {
// Move low f64 and clear high bits.
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSDZrr (v2f64 (V_SET0)),
+ (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSDZrr (v2f64 (V_SET0)),
+ (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
- (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
+ (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
- (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
+ (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
// Extract and store.
@@ -3582,10 +3544,6 @@ let Predicates = [HasAVX512] in {
(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
- def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
- (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
- def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
- (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
// 256-bit variants
def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
@@ -3635,6 +3593,8 @@ let Predicates = [HasAVX512] in {
}
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
let AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
+ (VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
@@ -3669,42 +3629,26 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8i64 (X86vzload addr:$src)),
(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
}
-
-def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
-
-def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
-
-def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
-
-def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
-
//===----------------------------------------------------------------------===//
// AVX-512 - Non-temporals
//===----------------------------------------------------------------------===//
let SchedRW = [WriteLoad] in {
def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
(ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
- SSEPackedInt>, EVEX, T8PD, EVEX_V512,
+ [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
EVEX_CD8<64, CD8VF>;
let Predicates = [HasVLX] in {
def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
(ins i256mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR256X:$dst, (int_x86_avx2_movntdqa addr:$src))],
- SSEPackedInt>, EVEX, T8PD, EVEX_V256,
+ [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
EVEX_CD8<64, CD8VF>;
def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
(ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR128X:$dst, (int_x86_sse41_movntdqa addr:$src))],
- SSEPackedInt>, EVEX, T8PD, EVEX_V128,
+ [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
EVEX_CD8<64, CD8VF>;
}
}
@@ -4150,8 +4094,7 @@ let Predicates = [HasDQI, NoVLX] in {
//===----------------------------------------------------------------------===//
multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _, OpndItins itins,
- bit IsCommutable = 0> {
+ X86VectorVTInfo _, bit IsCommutable = 0> {
defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -4159,7 +4102,7 @@ multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(bitconvert (_.VT _.RC:$src2)))),
(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
_.RC:$src2)))),
- itins.rr, IsCommutable>,
+ IIC_SSE_BIT_P_RR, IsCommutable>,
AVX512BIBase, EVEX_4V;
defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -4169,14 +4112,13 @@ multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(bitconvert (_.LdFrag addr:$src2)))),
(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)))))),
- itins.rm>,
+ IIC_SSE_BIT_P_RM>,
AVX512BIBase, EVEX_4V;
}
multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _, OpndItins itins,
- bit IsCommutable = 0> :
- avx512_logic_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
+ X86VectorVTInfo _, bit IsCommutable = 0> :
+ avx512_logic_rm<opc, OpcodeStr, OpNode, _, IsCommutable> {
defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
@@ -4189,58 +4131,48 @@ multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
(bitconvert
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))))),
- itins.rm>,
+ IIC_SSE_BIT_P_RM>,
AVX512BIBase, EVEX_4V, EVEX_B;
}
multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
- AVX512VLVectorVTInfo VTInfo, OpndItins itins,
- Predicate prd, bit IsCommutable = 0> {
- let Predicates = [prd] in
- defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
+ AVX512VLVectorVTInfo VTInfo,
+ bit IsCommutable = 0> {
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info512,
IsCommutable>, EVEX_V512;
- let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
IsCommutable>, EVEX_V256;
- defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
+ defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
IsCommutable>, EVEX_V128;
}
}
multiclass avx512_logic_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
- itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
+ IsCommutable>, EVEX_CD8<32, CD8VF>;
}
multiclass avx512_logic_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
- itins, prd, IsCommutable>,
- VEX_W, EVEX_CD8<64, CD8VF>;
+ IsCommutable>,
+ VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
- SDNode OpNode, OpndItins itins, Predicate prd,
- bit IsCommutable = 0> {
- defm Q : avx512_logic_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
- IsCommutable>;
-
- defm D : avx512_logic_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
- IsCommutable>;
+ SDNode OpNode, bit IsCommutable = 0> {
+ defm Q : avx512_logic_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, IsCommutable>;
+ defm D : avx512_logic_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, IsCommutable>;
}
-defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
- SSE_INTALU_ITINS_P, HasAVX512, 1>;
-defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
- SSE_INTALU_ITINS_P, HasAVX512, 1>;
-defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
- SSE_INTALU_ITINS_P, HasAVX512, 1>;
-defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
- SSE_INTALU_ITINS_P, HasAVX512, 0>;
+defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, 1>;
+defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, 1>;
+defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 1>;
+defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp>;
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
@@ -4252,16 +4184,16 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 FROUND_CURRENT)),
+ (_.VT (VecNode _.RC:$src1, _.RC:$src2,
+ (i32 FROUND_CURRENT))),
itins.rr>;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (VecNode (_.VT _.RC:$src1),
- (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
- (i32 FROUND_CURRENT)),
+ (_.VT (VecNode _.RC:$src1,
+ _.ScalarIntMemCPat:$src2,
+ (i32 FROUND_CURRENT))),
itins.rm>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -4291,13 +4223,43 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo
EVEX_B, EVEX_RC;
}
multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
- SDNode VecNode, OpndItins itins, bit IsCommutable> {
- let ExeDomain = _.ExeDomain in
+ SDNode OpNode, SDNode VecNode, SDNode SaeNode,
+ OpndItins itins, bit IsCommutable> {
+ let ExeDomain = _.ExeDomain in {
+ defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (VecNode _.RC:$src1, _.RC:$src2)),
+ itins.rr>;
+
+ defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (VecNode _.RC:$src1,
+ _.ScalarIntMemCPat:$src2)),
+ itins.rm>;
+
+ let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
+ def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
+ itins.rr> {
+ let isCommutable = IsCommutable;
+ }
+ def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1,
+ (_.ScalarLdFrag addr:$src2)))], itins.rm>;
+ }
+
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
- (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 FROUND_NO_EXC))>, EVEX_B;
+ }
}
multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -4316,31 +4278,29 @@ multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode VecNode,
+ SDNode VecNode, SDNode SaeNode,
SizeItins itins, bit IsCommutable> {
- defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
- itins.s, IsCommutable>,
- avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, VecNode,
- itins.s, IsCommutable>,
+ defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
+ VecNode, SaeNode, itins.s, IsCommutable>,
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
- defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
- itins.d, IsCommutable>,
- avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, VecNode,
- itins.d, IsCommutable>,
+ defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
+ VecNode, SaeNode, itins.d, IsCommutable>,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
}
-defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>;
-defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_MUL_ITINS_S, 1>;
-defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>;
-defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_DIV_ITINS_S, 0>;
-defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 0>;
-defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 0>;
+defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
+defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
+defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
+defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
+defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
+ SSE_ALU_ITINS_S, 0>;
+defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
+ SSE_ALU_ITINS_S, 0>;
// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
// X86fminc and X86fmaxc instead of X86fmin and X86fmax
multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
- let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
+ let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -4598,6 +4558,7 @@ let Predicates = [HasVLX,HasDQI] in {
multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
@@ -4613,10 +4574,12 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>,
EVEX_4V, EVEX_B;
+ }
}
multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
@@ -4627,6 +4590,7 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src1,
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT))>;
+ }
}
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
@@ -4899,6 +4863,33 @@ defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>;
+// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
+let Predicates = [HasAVX512, NoVLX] in {
+ def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPSRAQZrr
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ VR128X:$src2)), sub_ymm)>;
+
+ def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPSRAQZrr
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
+ VR128X:$src2)), sub_xmm)>;
+
+ def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPSRAQZri
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ imm:$src2)), sub_ymm)>;
+
+ def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPSRAQZri
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
+ imm:$src2)), sub_xmm)>;
+}
+
//===-------------------------------------------------------------------===//
// Variable Bit Shifts
//===-------------------------------------------------------------------===//
@@ -4932,6 +4923,7 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
}
+
multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
@@ -4955,12 +4947,13 @@ multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
-multiclass avx512_var_shift_w_lowering<AVX512VLVectorVTInfo _, SDNode OpNode> {
- let Predicates = [HasBWI, NoVLX] in {
+multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
+ SDNode OpNode, list<Predicate> p> {
+ let Predicates = p in {
def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
(_.info256.VT _.info256.RC:$src2))),
(EXTRACT_SUBREG
- (!cast<Instruction>(NAME#"WZrr")
+ (!cast<Instruction>(OpcodeStr#"Zrr")
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
sub_ymm)>;
@@ -4968,13 +4961,12 @@ multiclass avx512_var_shift_w_lowering<AVX512VLVectorVTInfo _, SDNode OpNode> {
def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
(_.info128.VT _.info128.RC:$src2))),
(EXTRACT_SUBREG
- (!cast<Instruction>(NAME#"WZrr")
+ (!cast<Instruction>(OpcodeStr#"Zrr")
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
sub_xmm)>;
}
}
-
multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasBWI] in
@@ -4990,19 +4982,22 @@ multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
}
defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
- avx512_var_shift_w<0x12, "vpsllvw", shl>,
- avx512_var_shift_w_lowering<avx512vl_i16_info, shl>;
+ avx512_var_shift_w<0x12, "vpsllvw", shl>;
defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
- avx512_var_shift_w<0x11, "vpsravw", sra>,
- avx512_var_shift_w_lowering<avx512vl_i16_info, sra>;
+ avx512_var_shift_w<0x11, "vpsravw", sra>;
defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
- avx512_var_shift_w<0x10, "vpsrlvw", srl>,
- avx512_var_shift_w_lowering<avx512vl_i16_info, srl>;
+ avx512_var_shift_w<0x10, "vpsrlvw", srl>;
+
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
+defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
+
// Special handing for handling VPSRAV intrinsics.
multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
list<Predicate> p> {
@@ -5013,7 +5008,6 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
(!cast<Instruction>(InstrStr#_.ZSuffix##rm)
_.RC:$src1, addr:$src2)>;
- let AddedComplexity = 20 in {
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
@@ -5023,8 +5017,6 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
_.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, addr:$src2)>;
- }
- let AddedComplexity = 30 in {
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
@@ -5034,7 +5026,6 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
_.RC:$src1, addr:$src2)>;
- }
}
}
@@ -5046,14 +5037,12 @@ multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
(X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
_.RC:$src1, addr:$src2)>;
- let AddedComplexity = 20 in
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, addr:$src2)>;
- let AddedComplexity = 30 in
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2))),
@@ -5251,6 +5240,7 @@ let Predicates = [HasAVX512] in {
//===----------------------------------------------------------------------===//
multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in
def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, f64mem:$src2),
!strconcat(OpcodeStr,
@@ -5599,7 +5589,7 @@ multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
"$src3, $src2", "$src2, $src3", RHS_VEC_r, 1, 1>, AVX512FMA3Base;
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr,
+ (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", RHS_VEC_m, 1, 1>, AVX512FMA3Base;
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -5625,13 +5615,13 @@ multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
SDNode OpNodeRnds3, X86VectorVTInfo _ , string SUFF> {
-
+ let ExeDomain = _.ExeDomain in {
defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
// Operands for intrinsic are in 123 order to preserve passthu
// semantics.
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 FROUND_CURRENT))),
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2,
- (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))), (i32 FROUND_CURRENT))),
+ _.ScalarIntMemCPat:$src3, (i32 FROUND_CURRENT))),
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
(i32 imm:$rc))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
@@ -5641,8 +5631,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
(_.VT (OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 FROUND_CURRENT))),
- (_.VT (OpNodeRnds3 _.RC:$src2,
- (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
+ (_.VT (OpNodeRnds3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
_.RC:$src1, (i32 FROUND_CURRENT))),
(_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
(i32 imm:$rc))),
@@ -5653,8 +5642,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 FROUND_CURRENT))),
- (_.VT (OpNodeRnds1 _.RC:$src1,
- (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
+ (_.VT (OpNodeRnds1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
_.RC:$src2, (i32 FROUND_CURRENT))),
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2,
(i32 imm:$rc))),
@@ -5662,6 +5650,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
_.FRC:$src2))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>;
+ }
}
multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
@@ -5692,6 +5681,7 @@ defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub,
let Constraints = "$src1 = $dst" in {
multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
@@ -5711,6 +5701,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src1,
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B;
+ }
}
} // Constraints = "$src1 = $dst"
@@ -5878,10 +5869,10 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT ,
!strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
[(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
EVEX, VEX_LIG, EVEX_B, EVEX_RC;
- def rm : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
+ def rm : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode
- (SrcVT.VT (scalar_to_vector (SrcVT.ScalarLdFrag addr:$src))),
+ (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
(i32 FROUND_CURRENT)))]>,
EVEX, VEX_LIG;
} // Predicates = [HasAVX512]
@@ -5918,20 +5909,20 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info,
let Predicates = [HasAVX512] in {
def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
(VCVTSS2SIZrr VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse_cvtss2si (sse_load_f32 addr:$src))),
- (VCVTSS2SIZrm addr:$src)>;
+ def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
+ (VCVTSS2SIZrm sse_load_f32:$src)>;
def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
(VCVTSS2SI64Zrr VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse_cvtss2si64 (sse_load_f32 addr:$src))),
- (VCVTSS2SI64Zrm addr:$src)>;
+ def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
+ (VCVTSS2SI64Zrm sse_load_f32:$src)>;
def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
(VCVTSD2SIZrr VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse2_cvtsd2si (sse_load_f64 addr:$src))),
- (VCVTSD2SIZrm addr:$src)>;
+ def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
+ (VCVTSD2SIZrm sse_load_f64:$src)>;
def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
(VCVTSD2SI64Zrr VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (sse_load_f64 addr:$src))),
- (VCVTSD2SI64Zrm addr:$src)>;
+ def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
+ (VCVTSD2SI64Zrm sse_load_f64:$src)>;
} // HasAVX512
let Predicates = [HasAVX512] in {
@@ -6018,7 +6009,7 @@ let Predicates = [HasAVX512] in {
EVEX,VEX_LIG , EVEX_B;
let mayLoad = 1, hasSideEffects = 0 in
def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
- (ins _SrcRC.MemOp:$src),
+ (ins _SrcRC.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[]>, EVEX, VEX_LIG;
@@ -6055,47 +6046,58 @@ defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
let Predicates = [HasAVX512] in {
def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
(VCVTTSS2SIZrr_Int VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse_cvttss2si (sse_load_f32 addr:$src))),
- (VCVTTSS2SIZrm_Int addr:$src)>;
+ def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
+ (VCVTTSS2SIZrm_Int ssmem:$src)>;
def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
(VCVTTSS2SI64Zrr_Int VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse_cvttss2si64 (sse_load_f32 addr:$src))),
- (VCVTTSS2SI64Zrm_Int addr:$src)>;
+ def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
+ (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
(VCVTTSD2SIZrr_Int VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse2_cvttsd2si (sse_load_f64 addr:$src))),
- (VCVTTSD2SIZrm_Int addr:$src)>;
+ def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
+ (VCVTTSD2SIZrm_Int sdmem:$src)>;
def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
(VCVTTSD2SI64Zrr_Int VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (sse_load_f64 addr:$src))),
- (VCVTTSD2SI64Zrm_Int addr:$src)>;
+ def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
+ (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
} // HasAVX512
//===----------------------------------------------------------------------===//
// AVX-512 Convert form float to double and back
//===----------------------------------------------------------------------===//
multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode> {
- defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
(_Src.VT _Src.RC:$src2),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
- defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
+ defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
- (_Src.VT (scalar_to_vector
- (_Src.ScalarLdFrag addr:$src2))),
+ (_Src.VT _Src.ScalarIntMemCPat:$src2),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+
+ let isCodeGenOnly = 1, hasSideEffects = 0 in {
+ def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _Src.FRC:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
+ let mayLoad = 1 in
+ def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ }
}
// Scalar Coversion with SAE - suppress all exceptions
multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
- defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
@@ -6107,7 +6109,7 @@ multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTIn
// Scalar Conversion with rounding control (RC)
multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
- defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
@@ -6140,39 +6142,36 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
X86fpextRnd,f32x_info, f64x_info >;
def : Pat<(f64 (fpextend FR32X:$src)),
- (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
- (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
+ (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (fpextend (loadf32 addr:$src))),
- (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
+ (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (extloadf32 addr:$src)),
- (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
+ (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512, OptForSize]>;
def : Pat<(f64 (extloadf32 addr:$src)),
- (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
- (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
+ (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
Requires<[HasAVX512, OptForSpeed]>;
def : Pat<(f32 (fpround FR64X:$src)),
- (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
- (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
+ (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector
(f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
- (VCVTSD2SSZrr VR128X:$dst, VR128X:$src)>,
+ (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector
(f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
- (VCVTSS2SDZrr VR128X:$dst, VR128X:$src)>,
+ (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
//===----------------------------------------------------------------------===//
@@ -6808,7 +6807,7 @@ let Predicates = [HasAVX512] in {
let Predicates = [HasVLX] in {
defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
- defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f128mem>,
+ defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
}
}
@@ -6917,7 +6916,7 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
- let AddedComplexity = 20 , Predicates = [HasAVX512] in {
+ let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -6942,6 +6941,7 @@ defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>,
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD;
@@ -6955,6 +6955,7 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
EVEX, T8PD, EVEX_B;
+ }
}
multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
@@ -6986,7 +6987,7 @@ defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode> {
-
+ let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -7005,6 +7006,7 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(OpNode (_.VT _.RC:$src1),
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT))>;
+ }
}
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode> {
@@ -7024,7 +7026,7 @@ defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
SDNode OpNode> {
-
+ let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>;
@@ -7041,9 +7043,11 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))),
(i32 FROUND_CURRENT))>, EVEX_B;
+ }
}
multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
SDNode OpNode> {
+ let ExeDomain = _.ExeDomain in
defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr,
"{sae}, $src", "$src, {sae}",
@@ -7084,6 +7088,7 @@ defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
SDNode OpNodeRnd, X86VectorVTInfo _>{
+ let ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
(_.VT (OpNodeRnd _.RC:$src, (i32 imm:$rc)))>,
@@ -7092,6 +7097,7 @@ multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
+ let ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(_.FloatVT (OpNode _.RC:$src))>, EVEX;
@@ -7106,6 +7112,7 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
EVEX, EVEX_B;
+ }
}
multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
@@ -7143,7 +7150,7 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
string SUFF, SDNode OpNode, SDNode OpNodeRnd> {
-
+ let ExeDomain = _.ExeDomain in {
defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -7176,6 +7183,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
}
+ }
def : Pat<(_.EltVT (OpNode _.FRC:$src)),
(!cast<Instruction>(NAME#SUFF#Zr)
@@ -7480,11 +7488,11 @@ multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
}
multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
+ SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasBWI] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v8i16x_info,
- v16i8x_info, i64mem, LdFrag, OpNode>,
+ v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v16i16x_info,
@@ -7499,11 +7507,11 @@ multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
}
multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
+ SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
- v16i8x_info, i32mem, LdFrag, OpNode>,
+ v16i8x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
@@ -7518,11 +7526,11 @@ multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
}
multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
+ SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
- v16i8x_info, i16mem, LdFrag, OpNode>,
+ v16i8x_info, i16mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
@@ -7537,11 +7545,11 @@ multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
}
multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
+ SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
- v8i16x_info, i64mem, LdFrag, OpNode>,
+ v8i16x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
@@ -7556,11 +7564,11 @@ multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
}
multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
+ SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
- v8i16x_info, i32mem, LdFrag, OpNode>,
+ v8i16x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
@@ -7575,12 +7583,12 @@ multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
}
multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
+ SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
- v4i32x_info, i64mem, LdFrag, OpNode>,
+ v4i32x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
@@ -7594,19 +7602,19 @@ multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
}
}
-defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">;
-defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">;
-defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">;
-defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">;
-defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">;
-defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">;
+defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z">;
+defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z">;
+defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z">;
+defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z">;
+defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z">;
+defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z">;
-defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">;
-defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">;
-defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">;
-defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">;
-defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">;
-defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
+defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s">;
+defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s">;
+defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s">;
+defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s">;
+defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s">;
+defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s">;
// EXTLOAD patterns, implemented using vpmovz
multiclass avx512_ext_lowering<string InstrStr, X86VectorVTInfo To,
@@ -7649,69 +7657,69 @@ let Predicates = [HasAVX512] in {
defm : avx512_ext_lowering<"DQZ", v8i64_info, v8i32x_info, extloadvi32>;
}
-multiclass AVX512_pmovx_patterns<string OpcPrefix, string ExtTy,
- SDNode ExtOp, PatFrag ExtLoad16> {
+multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
+ SDNode InVecOp, PatFrag ExtLoad16> {
// 128-bit patterns
let Predicates = [HasVLX, HasBWI] in {
- def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
- def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
+ def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
}
// 256-bit patterns
@@ -7790,8 +7798,8 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, string ExtTy,
}
}
-defm : AVX512_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
-defm : AVX512_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
+defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
+defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations
@@ -7832,7 +7840,7 @@ multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
mgatherv16i32>, EVEX_V512;
- defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz512mem,
+ defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
mgatherv8i64>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
@@ -7889,7 +7897,7 @@ multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
mscatterv16i32>, EVEX_V512;
- defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz512mem,
+ defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
mscatterv8i64>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
@@ -7922,7 +7930,7 @@ defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
- VK8WM, vz512mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
@@ -7934,7 +7942,7 @@ defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
- VK8WM, vz512mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
@@ -7946,7 +7954,7 @@ defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
- VK8WM, vz512mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
@@ -7958,7 +7966,7 @@ defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
- VK8WM, vz512mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
@@ -7982,6 +7990,17 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
[(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
}
+// Use 512bit version to implement 128/256 bit in case NoVLX.
+multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info,
+ X86VectorVTInfo _> {
+
+ def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))),
+ (X86Info.VT (EXTRACT_SUBREG
+ (_.VT (!cast<Instruction>(NAME#"Zrr")
+ (_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))),
+ X86Info.SubRegIdx))>;
+}
+
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
string OpcodeStr, Predicate prd> {
let Predicates = [prd] in
@@ -7991,20 +8010,17 @@ let Predicates = [prd] in
defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
}
-}
+let Predicates = [prd, NoVLX] in {
+ defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>;
+ defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>;
+ }
-multiclass avx512_convert_mask_to_vector<string OpcodeStr> {
- defm NAME##B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, OpcodeStr,
- HasBWI>;
- defm NAME##W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, OpcodeStr,
- HasBWI>, VEX_W;
- defm NAME##D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, OpcodeStr,
- HasDQI>;
- defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr,
- HasDQI>, VEX_W;
}
-defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
+defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
+defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
+defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
+defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
@@ -8319,6 +8335,7 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in
defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, {sae}, $src2, $src1",
@@ -8466,6 +8483,39 @@ defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+let Predicates = [HasAVX512] in {
+// Provide fallback in case the load node that is used in the broadcast
+// patterns above is used by additional users, which prevents the pattern
+// selection.
+def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
+ (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ 0)>;
+def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
+ (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ 0)>;
+
+def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
+ (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ 0)>;
+def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
+ (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ 0)>;
+
+def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
+ (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ 0)>;
+
+def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
+ (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ 0)>;
+}
+
multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I> {
defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
AVX512AIi8Base, EVEX_4V;
@@ -8503,6 +8553,7 @@ defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr,
"$src1", "$src1",
@@ -8513,6 +8564,7 @@ multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src1", "$src1",
(_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
+ }
}
multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -8577,66 +8629,7 @@ multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
HasBWI>;
}
-defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
-
-def avx512_v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
- VR128X:$src))>;
-def avx512_v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128X:$src, (i8 15)))>;
-def avx512_v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128X:$src, (i8 31)))>;
-def avx512_v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
- VR256X:$src))>;
-def avx512_v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256X:$src, (i8 15)))>;
-def avx512_v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256X:$src, (i8 31)))>;
-
-let Predicates = [HasBWI, HasVLX] in {
- def : Pat<(xor
- (bc_v2i64 (avx512_v16i1sextv16i8)),
- (bc_v2i64 (add (v16i8 VR128X:$src), (avx512_v16i1sextv16i8)))),
- (VPABSBZ128rr VR128X:$src)>;
- def : Pat<(xor
- (bc_v2i64 (avx512_v8i1sextv8i16)),
- (bc_v2i64 (add (v8i16 VR128X:$src), (avx512_v8i1sextv8i16)))),
- (VPABSWZ128rr VR128X:$src)>;
- def : Pat<(xor
- (bc_v4i64 (avx512_v32i1sextv32i8)),
- (bc_v4i64 (add (v32i8 VR256X:$src), (avx512_v32i1sextv32i8)))),
- (VPABSBZ256rr VR256X:$src)>;
- def : Pat<(xor
- (bc_v4i64 (avx512_v16i1sextv16i16)),
- (bc_v4i64 (add (v16i16 VR256X:$src), (avx512_v16i1sextv16i16)))),
- (VPABSWZ256rr VR256X:$src)>;
-}
-let Predicates = [HasAVX512, HasVLX] in {
- def : Pat<(xor
- (bc_v2i64 (avx512_v4i1sextv4i32)),
- (bc_v2i64 (add (v4i32 VR128X:$src), (avx512_v4i1sextv4i32)))),
- (VPABSDZ128rr VR128X:$src)>;
- def : Pat<(xor
- (bc_v4i64 (avx512_v8i1sextv8i32)),
- (bc_v4i64 (add (v8i32 VR256X:$src), (avx512_v8i1sextv8i32)))),
- (VPABSDZ256rr VR256X:$src)>;
-}
-
-let Predicates = [HasAVX512] in {
-def : Pat<(xor
- (bc_v8i64 (v16i1sextv16i32)),
- (bc_v8i64 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
- (VPABSDZrr VR512:$src)>;
-def : Pat<(xor
- (bc_v8i64 (v8i1sextv8i64)),
- (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
- (VPABSQZrr VR512:$src)>;
-}
-let Predicates = [HasBWI] in {
-def : Pat<(xor
- (bc_v8i64 (v64i1sextv64i8)),
- (bc_v8i64 (add (v64i8 VR512:$src), (v64i1sextv64i8)))),
- (VPABSBZrr VR512:$src)>;
-def : Pat<(xor
- (bc_v8i64 (v32i1sextv32i16)),
- (bc_v8i64 (add (v32i16 VR512:$src), (v32i1sextv32i16)))),
- (VPABSWZrr VR512:$src)>;
-}
+defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;
multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
@@ -8663,6 +8656,7 @@ defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
@@ -8671,6 +8665,7 @@ multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (OpNode (_.VT (scalar_to_vector
(_.ScalarLdFrag addr:$src)))))>,
EVEX, EVEX_CD8<_.EltSize, CD8VH>;
+ }
}
multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -8947,6 +8942,68 @@ multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
HasBWI>, EVEX_4V;
+// Transforms to swizzle an immediate to enable better matching when
+// memory operand isn't in the right place.
+def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
+ // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
+ uint8_t Imm = N->getZExtValue();
+ // Swap bits 1/4 and 3/6.
+ uint8_t NewImm = Imm & 0xa5;
+ if (Imm & 0x02) NewImm |= 0x10;
+ if (Imm & 0x10) NewImm |= 0x02;
+ if (Imm & 0x08) NewImm |= 0x40;
+ if (Imm & 0x40) NewImm |= 0x08;
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
+ // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
+ uint8_t Imm = N->getZExtValue();
+ // Swap bits 2/4 and 3/5.
+ uint8_t NewImm = Imm & 0xc3;
+ if (Imm & 0x04) NewImm |= 0x10;
+ if (Imm & 0x10) NewImm |= 0x04;
+ if (Imm & 0x08) NewImm |= 0x20;
+ if (Imm & 0x20) NewImm |= 0x08;
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
+ // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
+ uint8_t Imm = N->getZExtValue();
+ // Swap bits 1/2 and 5/6.
+ uint8_t NewImm = Imm & 0x99;
+ if (Imm & 0x02) NewImm |= 0x04;
+ if (Imm & 0x04) NewImm |= 0x02;
+ if (Imm & 0x20) NewImm |= 0x40;
+ if (Imm & 0x40) NewImm |= 0x20;
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
+ // Convert a VPTERNLOG immediate by moving operand 1 to the end.
+ uint8_t Imm = N->getZExtValue();
+ // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
+ uint8_t NewImm = Imm & 0x81;
+ if (Imm & 0x02) NewImm |= 0x04;
+ if (Imm & 0x04) NewImm |= 0x10;
+ if (Imm & 0x08) NewImm |= 0x40;
+ if (Imm & 0x10) NewImm |= 0x02;
+ if (Imm & 0x20) NewImm |= 0x08;
+ if (Imm & 0x40) NewImm |= 0x20;
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
+ // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
+ uint8_t Imm = N->getZExtValue();
+ // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
+ uint8_t NewImm = Imm & 0x81;
+ if (Imm & 0x02) NewImm |= 0x10;
+ if (Imm & 0x04) NewImm |= 0x02;
+ if (Imm & 0x08) NewImm |= 0x20;
+ if (Imm & 0x10) NewImm |= 0x04;
+ if (Imm & 0x20) NewImm |= 0x40;
+ if (Imm & 0x40) NewImm |= 0x08;
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+
multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
@@ -8975,6 +9032,141 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(i8 imm:$src4)), 1, 0>, EVEX_B,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
}// Constraints = "$src1 = $dst"
+
+ // Additional patterns for matching passthru operand in other positions.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
+
+ // Additional patterns for matching loads in other positions.
+ def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
+ addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (OpNode _.RC:$src1,
+ (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src2, (i8 imm:$src4))),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
+ addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+
+ // Additional patterns for matching zero masking with loads in other
+ // positions.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src2, (i8 imm:$src4)),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+
+ // Additional patterns for matching masked loads with different
+ // operand orders.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src2, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src2, _.RC:$src1,
+ (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
+
+ // Additional patterns for matching broadcasts in other positions.
+ def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
+ addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (OpNode _.RC:$src1,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, (i8 imm:$src4))),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
+ addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+
+ // Additional patterns for matching zero masking with broadcasts in other
+ // positions.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
+ _.KRCWM:$mask, _.RC:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src1,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, (i8 imm:$src4)),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
+ _.KRCWM:$mask, _.RC:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 imm:$src4))>;
+
+ // Additional patterns for matching masked broadcasts with different
+ // operand orders.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src1,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src2, _.RC:$src1,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ (i8 imm:$src4)), _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src2,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
}
multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index ba970bc2048e..dcce7b9951f2 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -147,7 +147,7 @@ addOffset(const MachineInstrBuilder &MIB, int Offset) {
static inline const MachineInstrBuilder &
addOffset(const MachineInstrBuilder &MIB, const MachineOperand& Offset) {
- return MIB.addImm(1).addReg(0).addOperand(Offset).addReg(0);
+ return MIB.addImm(1).addReg(0).add(Offset).addReg(0);
}
/// addRegOffset - This function is used to add a memory reference of the form
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index c73c95019f8d..b85abfb9ca7f 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -110,3 +110,9 @@ defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal
defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal
defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than
+// SALC is an undocumented instruction. Information for this instruction can be found
+// here http://www.rcollins.org/secrets/opcodes/SALC.html
+// Set AL if carry.
+let Uses = [EFLAGS], Defs = [AL] in {
+ def SALC : I<0xD6, RawFrm, (outs), (ins), "salc", []>, Requires<[Not64BitMode]>;
+}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 3c27eb8077d0..e592c2b3c0aa 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -259,20 +259,20 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
// Alias instruction mapping movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
- isPseudo = 1, AddedComplexity = 20 in
+ isPseudo = 1, AddedComplexity = 10 in
def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
// Other widths can also make use of the 32-bit xor, which may have a smaller
// encoding and avoid partial register updates.
+let AddedComplexity = 10 in {
def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
-def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
- let AddedComplexity = 20;
+def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>;
}
let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
- AddedComplexity = 15 in {
+ AddedComplexity = 10 in {
// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
// which only require 3 bytes compared to MOV32ri which requires 5.
let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
@@ -287,7 +287,7 @@ let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
}
-let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
+let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 5 in {
// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.
// FIXME: Add itinerary class and Schedule.
def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
@@ -772,11 +772,11 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
// the pseudo. The argument feeding EBX is ebx_input.
//
// The additional argument, $ebx_save, is a temporary register used to
-// save the value of RBX accross the actual instruction.
+// save the value of RBX across the actual instruction.
//
// To make sure the register assigned to $ebx_save does not interfere with
// the definition of the actual instruction, we use a definition $dst which
-// is tied to $rbx_save. That way, the live-range of $rbx_save spans accross
+// is tied to $rbx_save. That way, the live-range of $rbx_save spans across
// the instruction and we are sure we will have a valid register to restore
// the value of RBX.
let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX],
@@ -1743,6 +1743,12 @@ def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
+// sub reg, relocImm
+def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt8_su:$src2),
+ (SUB64ri8 GR64:$src1, i64relocImmSExt8_su:$src2)>;
+def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt32_su:$src2),
+ (SUB64ri32 GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
// mul reg, reg
def : Pat<(mul GR16:$src1, GR16:$src2),
(IMUL16rr GR16:$src1, GR16:$src2)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 2f260c48df47..4ea223e82be9 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -264,6 +264,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
"jmp{l}\t{*}$dst", [], IIC_JMP_MEM>;
}
+// Conditional tail calls are similar to the above, but they are branches
+// rather than barriers, and they use EFLAGS.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
+ isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+ let Uses = [ESP, EFLAGS] in {
+ def TCRETURNdicc : PseudoI<(outs),
+ (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
+
+ // This gets substituted to a conditional jump instruction in MC lowering.
+ def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
+ (ins i32imm_pcrel:$dst, i32imm:$cond),
+ "",
+ [], IIC_JMP_REL>;
+}
+
//===----------------------------------------------------------------------===//
// Call Instructions...
@@ -325,3 +340,19 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
"rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
}
}
+
+// Conditional tail calls are similar to the above, but they are branches
+// rather than barriers, and they use EFLAGS.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
+ isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+ let Uses = [RSP, EFLAGS] in {
+ def TCRETURNdi64cc : PseudoI<(outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$offset,
+ i32imm:$cond), []>;
+
+ // This gets substituted to a conditional jump instruction in MC lowering.
+ def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$cond),
+ "",
+ [], IIC_JMP_REL>;
+}
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 4b19f801dae1..1941ae57f0f1 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -191,13 +191,15 @@ multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, string PackTy, string Suff,
SDNode OpNode, RegisterClass RC,
- X86MemOperand x86memop> {
- defm NAME#132#Suff : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
- x86memop, RC>;
- defm NAME#213#Suff : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
- x86memop, RC, OpNode>;
- defm NAME#231#Suff : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
- x86memop, RC>;
+ X86MemOperand x86memop> {
+ let Predicates = [HasFMA, NoAVX512] in {
+ defm NAME#132#Suff : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
+ x86memop, RC>;
+ defm NAME#213#Suff : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
+ x86memop, RC, OpNode>;
+ defm NAME#231#Suff : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
+ x86memop, RC>;
+ }
}
// The FMA 213 form is created for lowering of scalar FMA intrinscis
diff --git a/lib/Target/X86/X86InstrFMA3Info.cpp b/lib/Target/X86/X86InstrFMA3Info.cpp
index db83497ee69d..00ef65cdb6bd 100644
--- a/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -16,11 +16,14 @@
#include "X86InstrInfo.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Threading.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
/// This flag is used in the method llvm::call_once() used below to make the
/// initialization of the map 'OpcodeToGroup' thread safe.
-LLVM_DEFINE_ONCE_FLAG(InitGroupsOnceFlag);
+static llvm::once_flag InitGroupsOnceFlag;
static ManagedStatic<X86InstrFMA3Info> X86InstrFMA3InfoObj;
X86InstrFMA3Info *X86InstrFMA3Info::getX86InstrFMA3Info() {
diff --git a/lib/Target/X86/X86InstrFMA3Info.h b/lib/Target/X86/X86InstrFMA3Info.h
index 025cee3b2b90..e3568160da46 100644
--- a/lib/Target/X86/X86InstrFMA3Info.h
+++ b/lib/Target/X86/X86InstrFMA3Info.h
@@ -1,4 +1,4 @@
-//===-- X86InstrFMA3Info.h - X86 FMA3 Instruction Information -------------===//
+//===- X86InstrFMA3Info.h - X86 FMA3 Instruction Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,9 +18,11 @@
#include "X86.h"
#include "llvm/ADT/DenseMap.h"
#include <cassert>
+#include <cstdint>
#include <set>
namespace llvm {
+
/// This class is used to group {132, 213, 231} forms of FMA opcodes together.
/// Each of the groups has either 3 register opcodes, 3 memory opcodes,
/// or 6 register and memory opcodes. Also, each group has an attrubutes field
@@ -201,7 +203,7 @@ public:
static X86InstrFMA3Info *getX86InstrFMA3Info();
/// Constructor. Just creates an object of the class.
- X86InstrFMA3Info() {}
+ X86InstrFMA3Info() = default;
/// Destructor. Deallocates the memory used for FMA3 Groups.
~X86InstrFMA3Info() {
@@ -310,6 +312,7 @@ public:
return rm_iterator(getX86InstrFMA3Info()->OpcodeToGroup.end());
}
};
-} // namespace llvm
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_X86_UTILS_X86INSTRFMA3INFO_H
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 10f3839ea8ed..11b1d070ef2f 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -631,6 +631,9 @@ let Defs = [FPSW] in
def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", [], IIC_FNINIT>;
def FFREE : FPI<0xDD, MRM0r, (outs), (ins RST:$reg),
"ffree\t$reg", IIC_FFREE>;
+def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RST:$reg),
+ "ffreep\t$reg", IIC_FFREE>;
+
// Clear exceptions
let Defs = [FPSW] in
@@ -665,15 +668,16 @@ def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>;
let Predicates = [HasFXSR] in {
def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
- "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
+ "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
- "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)],
- IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
+ "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)],
+ IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB;
+ "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>,
+ TB;
def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
- IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
+ "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
+ IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
} // Predicates = [FeatureFXSR]
} // SchedRW
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 610756aa37da..c2fe786732dc 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -199,7 +199,8 @@ class TAPS : TA { Prefix OpPrefix = PS; }
class TAPD : TA { Prefix OpPrefix = PD; }
class TAXD : TA { Prefix OpPrefix = XD; }
class VEX { Encoding OpEnc = EncVEX; }
-class VEX_W { bit hasVEX_WPrefix = 1; }
+class VEX_W { bits<2> VEX_WPrefix = 1; }
+class VEX_WIG { bits<2> VEX_WPrefix = 2; }
class VEX_4V : VEX { bit hasVEX_4V = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
@@ -270,7 +271,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasREPPrefix = 0; // Does this inst have a REP prefix?
Encoding OpEnc = EncNormal; // Encoding used by this instruction
bits<2> OpEncBits = OpEnc.Value;
- bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
+ bits<2> VEX_WPrefix = 0; // Does this inst set the VEX_W field?
bit hasVEX_4V = 0; // Does this inst require the VEX.VVVV field?
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
@@ -317,7 +318,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{28-27} = ExeDomain.Value;
let TSFlags{30-29} = OpEncBits;
let TSFlags{38-31} = Opcode;
- let TSFlags{39} = hasVEX_WPrefix;
+ // Currently no need for second bit in TSFlags - W Ignore is equivalent to 0.
+ let TSFlags{39} = VEX_WPrefix{0};
let TSFlags{40} = hasVEX_4V;
let TSFlags{41} = hasVEX_L;
let TSFlags{42} = hasEVEX_K;
@@ -453,7 +455,7 @@ class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
Domain d = GenericDomain>
: I<o, F, outs, ins, asm, pattern, itin, d> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
- !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
+ !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
!if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index c5689d7c698c..9867ba84bb9b 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -27,21 +27,19 @@ def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,
//===----------------------------------------------------------------------===//
def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
-def load_mvmmx : PatFrag<(ops node:$ptr),
- (x86mmx (MMX_X86movw2d (load node:$ptr)))>;
//===----------------------------------------------------------------------===//
// SSE specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<1, 2>,
- SDTCisFP<1>, SDTCisVT<3, i8>,
- SDTCisVec<1>]>;
-def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
- SDTCisSameAs<1, 2>, SDTCisInt<3>]>;
+def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>]>;
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+def X86fmins : SDNode<"X86ISD::FMINS", SDTFPBinOp>;
+def X86fmaxs : SDNode<"X86ISD::FMAXS", SDTFPBinOp>;
// Commutative and Associative FMIN and FMAX.
def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp,
@@ -200,6 +198,15 @@ def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
+def X86kshiftl : SDNode<"X86ISD::KSHIFTL",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
+ SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i8>]>>;
+def X86kshiftr : SDNode<"X86ISD::KSHIFTR",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
+ SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i8>]>>;
+
def X86vrotli : SDNode<"X86ISD::VROTLI", SDTIntShiftOp>;
def X86vrotri : SDNode<"X86ISD::VROTRI", SDTIntShiftOp>;
@@ -230,10 +237,11 @@ def X86vpermil2 : SDNode<"X86ISD::VPERMIL2",
SDTCisSameAs<0,2>,
SDTCisSameSizeAs<0,3>,
SDTCisSameNumEltsAs<0, 3>,
+ SDTCisFP<0>, SDTCisInt<3>,
SDTCisVT<4, i8>]>>;
def X86vpperm : SDNode<"X86ISD::VPPERM",
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>]>>;
+ SDTCisSameAs<0,2>, SDTCisSameAs<0, 3>]>>;
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
@@ -300,13 +308,17 @@ def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameSizeAs<0,2>,
- SDTCisSameNumEltsAs<0,2>]>;
+ SDTCisSameNumEltsAs<0,2>,
+ SDTCisFP<0>, SDTCisInt<2>]>;
def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i8>]>;
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>;
-def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
+def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
def SDTFPTernaryOpImmRound: SDTypeProfile<1, 5, [SDTCisFP<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
SDTCisInt<3>,
@@ -314,8 +326,10 @@ def SDTFPTernaryOpImmRound: SDTypeProfile<1, 5, [SDTCisFP<0>, SDTCisSameAs<0,1>,
SDTCisSameNumEltsAs<0, 3>,
SDTCisVT<4, i32>,
SDTCisVT<5, i32>]>;
-def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>]>;
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
@@ -324,9 +338,9 @@ def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
-def SDTTernlog : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisSameAs<0,3>,
- SDTCisVT<4, i8>]>;
+def SDTTernlog : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisVec<0>,
+ SDTCisSameAs<0,1>, SDTCisSameAs<0,2>,
+ SDTCisSameAs<0,3>, SDTCisVT<4, i8>]>;
def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisVT<3, i32>]>;
@@ -334,16 +348,13 @@ def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
def SDTFPUnaryOpRound : SDTypeProfile<1, 2, [ // fsqrt_round, fgetexp_round, etc.
SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisVT<2, i32>]>;
-def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
- SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>,
- SDTCisVT<4, i32>]>;
+ SDTCisFP<0>, SDTCisVT<4, i32>]>;
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
-def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
@@ -367,17 +378,28 @@ def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
-def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
+ SDTCisVec<1>, SDTCisInt<1>,
SDTCisSameSizeAs<0,1>,
- SDTCisSameAs<1,2>]>;
+ SDTCisSameAs<1,2>,
+ SDTCisOpSmallerThanOp<0, 1>]>;
def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
-def X86vpmaddubsw : SDNode<"X86ISD::VPMADDUBSW" , SDTPack>;
-def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD" , SDTPack, [SDNPCommutative]>;
+def X86vpmaddubsw : SDNode<"X86ISD::VPMADDUBSW",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, i8>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>]>>;
+def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i32>,
+ SDTCVecEltisVT<1, i16>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>]>,
+ [SDNPCommutative]>;
def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
@@ -414,8 +436,8 @@ def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImmRound>;
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>;
def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImmRound>;
def X86Vfpclass : SDNode<"X86ISD::VFPCLASS",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
- SDTCisVec<1>, SDTCisFP<1>,
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
+ SDTCisFP<1>,
SDTCisSameNumEltsAs<0,1>,
SDTCisVT<2, i32>]>, []>;
def X86Vfpclasss : SDNode<"X86ISD::VFPCLASSS",
@@ -428,9 +450,6 @@ def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
-def X86Vinsert : SDNode<"X86ISD::VINSERT", SDTypeProfile<1, 3,
- [SDTCisSameAs<0, 1>, SDTCisEltOfVec<2, 1>,
- SDTCisPtrTy<3>]>, []>;
def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2,
[SDTCisEltOfVec<0, 1>, SDTCisVec<1>,
SDTCisPtrTy<2>]>, []>;
@@ -440,24 +459,30 @@ def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
+def X86faddRnds : SDNode<"X86ISD::FADDS_RND", SDTFPBinOpRound>;
def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
+def X86fsubRnds : SDNode<"X86ISD::FSUBS_RND", SDTFPBinOpRound>;
def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
+def X86fmulRnds : SDNode<"X86ISD::FMULS_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
-def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
+def X86fdivRnds : SDNode<"X86ISD::FDIVS_RND", SDTFPBinOpRound>;
+def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
+def X86fmaxRnds : SDNode<"X86ISD::FMAXS_RND", SDTFPBinOpRound>;
+def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
+def X86fminRnds : SDNode<"X86ISD::FMINS_RND", SDTFPBinOpRound>;
def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>;
def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOpRound>;
-def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
def X86fsqrtRnds : SDNode<"X86ISD::FSQRTS_RND", SDTFPBinOpRound>;
def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
def X86fgetexpRnds : SDNode<"X86ISD::FGETEXPS_RND", SDTFPBinOpRound>;
-def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
-def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
-def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
-def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>;
-def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>;
-def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>;
+def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFPTernaryOp>;
+def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp>;
+def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp>;
+def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp>;
+def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFPTernaryOp>;
+def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFPTernaryOp>;
def X86FmaddRnd : SDNode<"X86ISD::FMADD_RND", SDTFmaRound>;
def X86FnmaddRnd : SDNode<"X86ISD::FNMADD_RND", SDTFmaRound>;
@@ -478,8 +503,10 @@ def X86FnmaddRnds3 : SDNode<"X86ISD::FNMADDS3_RND", SDTFmaRound>;
def X86FmsubRnds3 : SDNode<"X86ISD::FMSUBS3_RND", SDTFmaRound>;
def X86FnmsubRnds3 : SDNode<"X86ISD::FNMSUBS3_RND", SDTFmaRound>;
-def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTFma>;
-def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTFma>;
+def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
+def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma>;
+def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma>;
def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOpRound>;
def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOpRound>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 627b6120b048..7b456fd68343 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -414,17 +414,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VEXTRACTI64x2Zrr,X86::VEXTRACTI64x2Zmr, TB_FOLDED_STORE },
{ X86::VEXTRACTI64x4Zrr,X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE },
{ X86::VEXTRACTPSZrr, X86::VEXTRACTPSZmr, TB_FOLDED_STORE },
- { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
{ X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
{ X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
{ X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
{ X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
- { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE },
- { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE },
{ X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE },
{ X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE },
{ X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE },
{ X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE },
+ { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
+ { X86::VMOVPQIto64Zrr, X86::VMOVPQI2QIZmr, TB_FOLDED_STORE },
+ { X86::VMOVSDto64Zrr, X86::VMOVSDto64Zmr, TB_FOLDED_STORE },
+ { X86::VMOVSS2DIZrr, X86::VMOVSS2DIZmr, TB_FOLDED_STORE },
+ { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE },
+ { X86::VPEXTRDZrr, X86::VPEXTRDZmr, TB_FOLDED_STORE },
+ { X86::VPEXTRQZrr, X86::VPEXTRQZmr, TB_FOLDED_STORE },
{ X86::VPMOVDBZrr, X86::VPMOVDBZmr, TB_FOLDED_STORE },
{ X86::VPMOVDWZrr, X86::VPMOVDWZmr, TB_FOLDED_STORE },
{ X86::VPMOVQDZrr, X86::VPMOVQDZmr, TB_FOLDED_STORE },
@@ -867,11 +872,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// AVX-512 foldable instructions
{ X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE },
- { X86::VBROADCASTSSZr_s, X86::VBROADCASTSSZm, TB_NO_REVERSE },
{ X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE },
- { X86::VBROADCASTSDZr_s, X86::VBROADCASTSDZm, TB_NO_REVERSE },
{ X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
- { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE },
+ { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 },
+ { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 },
{ X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
{ X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 },
{ X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 },
@@ -883,8 +887,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 },
{ X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 },
{ X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 },
+ { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE },
+ { X86::VPABSBZrr, X86::VPABSBZrm, 0 },
{ X86::VPABSDZrr, X86::VPABSDZrm, 0 },
{ X86::VPABSQZrr, X86::VPABSQZrm, 0 },
+ { X86::VPABSWZrr, X86::VPABSWZrm, 0 },
{ X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 },
{ X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 },
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
@@ -904,12 +911,21 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 },
{ X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 },
{ X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 },
+ { X86::VPSLLDQZ512rr, X86::VPSLLDQZ512rm, 0 },
+ { X86::VPSLLDZri, X86::VPSLLDZmi, 0 },
+ { X86::VPSLLQZri, X86::VPSLLQZmi, 0 },
+ { X86::VPSLLWZri, X86::VPSLLWZmi, 0 },
+ { X86::VPSRADZri, X86::VPSRADZmi, 0 },
+ { X86::VPSRAQZri, X86::VPSRAQZmi, 0 },
+ { X86::VPSRAWZri, X86::VPSRAWZmi, 0 },
+ { X86::VPSRLDQZ512rr, X86::VPSRLDQZ512rm, 0 },
+ { X86::VPSRLDZri, X86::VPSRLDZmi, 0 },
+ { X86::VPSRLQZri, X86::VPSRLQZmi, 0 },
+ { X86::VPSRLWZri, X86::VPSRLWZmi, 0 },
// AVX-512 foldable instructions (256-bit versions)
{ X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256r_s, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
{ X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256r_s, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
{ X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
{ X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
{ X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
@@ -920,6 +936,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 },
{ X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 },
{ X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 },
+ { X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0 },
+ { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 },
+ { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 },
+ { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 },
{ X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 },
{ X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 },
{ X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 },
@@ -939,10 +959,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 },
{ X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 },
{ X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 },
+ { X86::VPSLLDQZ256rr, X86::VPSLLDQZ256rm, 0 },
+ { X86::VPSLLDZ256ri, X86::VPSLLDZ256mi, 0 },
+ { X86::VPSLLQZ256ri, X86::VPSLLQZ256mi, 0 },
+ { X86::VPSLLWZ256ri, X86::VPSLLWZ256mi, 0 },
+ { X86::VPSRADZ256ri, X86::VPSRADZ256mi, 0 },
+ { X86::VPSRAQZ256ri, X86::VPSRAQZ256mi, 0 },
+ { X86::VPSRAWZ256ri, X86::VPSRAWZ256mi, 0 },
+ { X86::VPSRLDQZ256rr, X86::VPSRLDQZ256rm, 0 },
+ { X86::VPSRLDZ256ri, X86::VPSRLDZ256mi, 0 },
+ { X86::VPSRLQZ256ri, X86::VPSRLQZ256mi, 0 },
+ { X86::VPSRLWZ256ri, X86::VPSRLWZ256mi, 0 },
// AVX-512 foldable instructions (128-bit versions)
{ X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128r_s, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
{ X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
{ X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
{ X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
@@ -953,6 +983,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 },
{ X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 },
{ X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 },
+ { X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0 },
+ { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 },
+ { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 },
+ { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 },
{ X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 },
{ X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 },
{ X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE },
@@ -970,6 +1004,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 },
{ X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 },
{ X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 },
+ { X86::VPSLLDQZ128rr, X86::VPSLLDQZ128rm, 0 },
+ { X86::VPSLLDZ128ri, X86::VPSLLDZ128mi, 0 },
+ { X86::VPSLLQZ128ri, X86::VPSLLQZ128mi, 0 },
+ { X86::VPSLLWZ128ri, X86::VPSLLWZ128mi, 0 },
+ { X86::VPSRADZ128ri, X86::VPSRADZ128mi, 0 },
+ { X86::VPSRAQZ128ri, X86::VPSRAQZ128mi, 0 },
+ { X86::VPSRAWZ128ri, X86::VPSRAWZ128mi, 0 },
+ { X86::VPSRLDQZ128rr, X86::VPSRLDQZ128rm, 0 },
+ { X86::VPSRLDZ128ri, X86::VPSRLDZ128mi, 0 },
+ { X86::VPSRLQZ128ri, X86::VPSRLQZ128mi, 0 },
+ { X86::VPSRLWZ128ri, X86::VPSRLWZ128mi, 0 },
// F16C foldable instructions
{ X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 },
@@ -1170,18 +1215,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::PINSRWrri, X86::PINSRWrmi, 0 },
{ X86::PMADDUBSWrr, X86::PMADDUBSWrm, TB_ALIGN_16 },
{ X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
+ { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
+ { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
{ X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
{ X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
- { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
- { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
+ { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
+ { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
{ X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 },
{ X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 },
+ { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
+ { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
{ X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 },
{ X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 },
- { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
- { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
- { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
- { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
{ X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
{ X86::PMULHRSWrr, X86::PMULHRSWrm, TB_ALIGN_16 },
{ X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
@@ -1340,8 +1385,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::PMULHRWrr, X86::PMULHRWrm, 0 },
// AVX 128-bit versions of foldable instructions
- { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
- { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, TB_NO_REVERSE },
{ X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 },
{ X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 },
{ X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 },
@@ -1350,8 +1393,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 },
{ X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 },
{ X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
- { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
- { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, TB_NO_REVERSE },
{ X86::VADDPDrr, X86::VADDPDrm, 0 },
{ X86::VADDPSrr, X86::VADDPSrm, 0 },
{ X86::VADDSDrr, X86::VADDSDrm, 0 },
@@ -1458,18 +1499,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPINSRWrri, X86::VPINSRWrmi, 0 },
{ X86::VPMADDUBSWrr, X86::VPMADDUBSWrm, 0 },
{ X86::VPMADDWDrr, X86::VPMADDWDrm, 0 },
+ { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 },
+ { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 },
{ X86::VPMAXSWrr, X86::VPMAXSWrm, 0 },
{ X86::VPMAXUBrr, X86::VPMAXUBrm, 0 },
- { X86::VPMINSWrr, X86::VPMINSWrm, 0 },
- { X86::VPMINUBrr, X86::VPMINUBrm, 0 },
+ { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 },
+ { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 },
{ X86::VPMINSBrr, X86::VPMINSBrm, 0 },
{ X86::VPMINSDrr, X86::VPMINSDrm, 0 },
+ { X86::VPMINSWrr, X86::VPMINSWrm, 0 },
+ { X86::VPMINUBrr, X86::VPMINUBrm, 0 },
{ X86::VPMINUDrr, X86::VPMINUDrm, 0 },
{ X86::VPMINUWrr, X86::VPMINUWrm, 0 },
- { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 },
- { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 },
- { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 },
- { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 },
{ X86::VPMULDQrr, X86::VPMULDQrm, 0 },
{ X86::VPMULHRSWrr, X86::VPMULHRSWrm, 0 },
{ X86::VPMULHUWrr, X86::VPMULHUWrm, 0 },
@@ -1626,18 +1667,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 },
{ X86::VPMADDUBSWYrr, X86::VPMADDUBSWYrm, 0 },
{ X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 },
+ { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 },
+ { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 },
{ X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 },
{ X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 },
- { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 },
- { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 },
+ { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 },
+ { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 },
{ X86::VPMINSBYrr, X86::VPMINSBYrm, 0 },
{ X86::VPMINSDYrr, X86::VPMINSDYrm, 0 },
+ { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 },
+ { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 },
{ X86::VPMINUDYrr, X86::VPMINUDYrm, 0 },
{ X86::VPMINUWYrr, X86::VPMINUWYrm, 0 },
- { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 },
- { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 },
- { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 },
- { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 },
{ X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 },
{ X86::VPMULDQYrr, X86::VPMULDQYrm, 0 },
{ X86::VPMULHRSWYrr, X86::VPMULHRSWYrm, 0 },
@@ -1732,7 +1773,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// XOP foldable instructions
{ X86::VPCMOVrrr, X86::VPCMOVrmr, 0 },
- { X86::VPCMOVrrrY, X86::VPCMOVrmrY, 0 },
+ { X86::VPCMOVYrrr, X86::VPCMOVYrmr, 0 },
{ X86::VPCOMBri, X86::VPCOMBmi, 0 },
{ X86::VPCOMDri, X86::VPCOMDmi, 0 },
{ X86::VPCOMQri, X86::VPCOMQmi, 0 },
@@ -1742,9 +1783,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPCOMUQri, X86::VPCOMUQmi, 0 },
{ X86::VPCOMUWri, X86::VPCOMUWmi, 0 },
{ X86::VPERMIL2PDrr, X86::VPERMIL2PDmr, 0 },
- { X86::VPERMIL2PDrrY, X86::VPERMIL2PDmrY, 0 },
+ { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYmr, 0 },
{ X86::VPERMIL2PSrr, X86::VPERMIL2PSmr, 0 },
- { X86::VPERMIL2PSrrY, X86::VPERMIL2PSmrY, 0 },
+ { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYmr, 0 },
{ X86::VPMACSDDrr, X86::VPMACSDDrm, 0 },
{ X86::VPMACSDQHrr, X86::VPMACSDQHrm, 0 },
{ X86::VPMACSDQLrr, X86::VPMACSDQLrm, 0 },
@@ -1800,8 +1841,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VANDNPSZrr, X86::VANDNPSZrm, 0 },
{ X86::VANDPDZrr, X86::VANDPDZrm, 0 },
{ X86::VANDPSZrr, X86::VANDPSZrm, 0 },
- { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
- { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
{ X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 },
{ X86::VCMPPSZrri, X86::VCMPPSZrmi, 0 },
{ X86::VCMPSDZrr, X86::VCMPSDZrm, 0 },
@@ -1842,6 +1881,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, TB_NO_REVERSE },
{ X86::VMINSSZrr, X86::VMINSSZrm, 0 },
{ X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, TB_NO_REVERSE },
+ { X86::VMOVLHPSZrr, X86::VMOVHPSZ128rm, TB_NO_REVERSE },
{ X86::VMULPDZrr, X86::VMULPDZrm, 0 },
{ X86::VMULPSZrr, X86::VMULPSZrm, 0 },
{ X86::VMULSDZrr, X86::VMULSDZrm, 0 },
@@ -1850,6 +1890,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE },
{ X86::VORPDZrr, X86::VORPDZrm, 0 },
{ X86::VORPSZrr, X86::VORPSZrm, 0 },
+ { X86::VPACKSSDWZrr, X86::VPACKSSDWZrm, 0 },
+ { X86::VPACKSSWBZrr, X86::VPACKSSWBZrm, 0 },
+ { X86::VPACKUSDWZrr, X86::VPACKUSDWZrm, 0 },
+ { X86::VPACKUSWBZrr, X86::VPACKUSWBZrm, 0 },
{ X86::VPADDBZrr, X86::VPADDBZrm, 0 },
{ X86::VPADDDZrr, X86::VPADDDZrm, 0 },
{ X86::VPADDQZrr, X86::VPADDQZrm, 0 },
@@ -1863,6 +1907,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZrr, X86::VPANDNDZrm, 0 },
{ X86::VPANDNQZrr, X86::VPANDNQZrm, 0 },
{ X86::VPANDQZrr, X86::VPANDQZrm, 0 },
+ { X86::VPAVGBZrr, X86::VPAVGBZrm, 0 },
+ { X86::VPAVGWZrr, X86::VPAVGWZrm, 0 },
{ X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 },
{ X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 },
{ X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 },
@@ -1887,26 +1933,55 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
{ X86::VPERMQZrr, X86::VPERMQZrm, 0 },
{ X86::VPERMWZrr, X86::VPERMWZrm, 0 },
+ { X86::VPINSRBZrr, X86::VPINSRBZrm, 0 },
+ { X86::VPINSRDZrr, X86::VPINSRDZrm, 0 },
+ { X86::VPINSRQZrr, X86::VPINSRQZrm, 0 },
+ { X86::VPINSRWZrr, X86::VPINSRWZrm, 0 },
{ X86::VPMADDUBSWZrr, X86::VPMADDUBSWZrm, 0 },
{ X86::VPMADDWDZrr, X86::VPMADDWDZrm, 0 },
+ { X86::VPMAXSBZrr, X86::VPMAXSBZrm, 0 },
{ X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 },
{ X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 },
+ { X86::VPMAXSWZrr, X86::VPMAXSWZrm, 0 },
+ { X86::VPMAXUBZrr, X86::VPMAXUBZrm, 0 },
{ X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 },
{ X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 },
+ { X86::VPMAXUWZrr, X86::VPMAXUWZrm, 0 },
+ { X86::VPMINSBZrr, X86::VPMINSBZrm, 0 },
{ X86::VPMINSDZrr, X86::VPMINSDZrm, 0 },
{ X86::VPMINSQZrr, X86::VPMINSQZrm, 0 },
+ { X86::VPMINSWZrr, X86::VPMINSWZrm, 0 },
+ { X86::VPMINUBZrr, X86::VPMINUBZrm, 0 },
{ X86::VPMINUDZrr, X86::VPMINUDZrm, 0 },
{ X86::VPMINUQZrr, X86::VPMINUQZrm, 0 },
+ { X86::VPMINUWZrr, X86::VPMINUWZrm, 0 },
{ X86::VPMULDQZrr, X86::VPMULDQZrm, 0 },
+ { X86::VPMULLDZrr, X86::VPMULLDZrm, 0 },
+ { X86::VPMULLQZrr, X86::VPMULLQZrm, 0 },
+ { X86::VPMULLWZrr, X86::VPMULLWZrm, 0 },
{ X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 },
{ X86::VPORDZrr, X86::VPORDZrm, 0 },
{ X86::VPORQZrr, X86::VPORQZrm, 0 },
+ { X86::VPSADBWZ512rr, X86::VPSADBWZ512rm, 0 },
{ X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 },
+ { X86::VPSLLDZrr, X86::VPSLLDZrm, 0 },
+ { X86::VPSLLQZrr, X86::VPSLLQZrm, 0 },
{ X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
{ X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
+ { X86::VPSLLVWZrr, X86::VPSLLVWZrm, 0 },
+ { X86::VPSLLWZrr, X86::VPSLLWZrm, 0 },
+ { X86::VPSRADZrr, X86::VPSRADZrm, 0 },
+ { X86::VPSRAQZrr, X86::VPSRAQZrm, 0 },
{ X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
+ { X86::VPSRAVQZrr, X86::VPSRAVQZrm, 0 },
+ { X86::VPSRAVWZrr, X86::VPSRAVWZrm, 0 },
+ { X86::VPSRAWZrr, X86::VPSRAWZrm, 0 },
+ { X86::VPSRLDZrr, X86::VPSRLDZrm, 0 },
+ { X86::VPSRLQZrr, X86::VPSRLQZrm, 0 },
{ X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
{ X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
+ { X86::VPSRLVWZrr, X86::VPSRLVWZrm, 0 },
+ { X86::VPSRLWZrr, X86::VPSRLWZrm, 0 },
{ X86::VPSUBBZrr, X86::VPSUBBZrm, 0 },
{ X86::VPSUBDZrr, X86::VPSUBDZrm, 0 },
{ X86::VPSUBQZrr, X86::VPSUBQZrm, 0 },
@@ -1957,9 +2032,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VANDPDZ256rr, X86::VANDPDZ256rm, 0 },
{ X86::VANDPSZ128rr, X86::VANDPSZ128rm, 0 },
{ X86::VANDPSZ256rr, X86::VANDPSZ256rm, 0 },
- { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
{ X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 },
{ X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 },
{ X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 },
@@ -1996,6 +2068,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VORPDZ256rr, X86::VORPDZ256rm, 0 },
{ X86::VORPSZ128rr, X86::VORPSZ128rm, 0 },
{ X86::VORPSZ256rr, X86::VORPSZ256rm, 0 },
+ { X86::VPACKSSDWZ256rr, X86::VPACKSSDWZ256rm, 0 },
+ { X86::VPACKSSDWZ128rr, X86::VPACKSSDWZ128rm, 0 },
+ { X86::VPACKSSWBZ256rr, X86::VPACKSSWBZ256rm, 0 },
+ { X86::VPACKSSWBZ128rr, X86::VPACKSSWBZ128rm, 0 },
+ { X86::VPACKUSDWZ256rr, X86::VPACKUSDWZ256rm, 0 },
+ { X86::VPACKUSDWZ128rr, X86::VPACKUSDWZ128rm, 0 },
+ { X86::VPACKUSWBZ256rr, X86::VPACKUSWBZ256rm, 0 },
+ { X86::VPACKUSWBZ128rr, X86::VPACKUSWBZ128rm, 0 },
{ X86::VPADDBZ128rr, X86::VPADDBZ128rm, 0 },
{ X86::VPADDBZ256rr, X86::VPADDBZ256rm, 0 },
{ X86::VPADDDZ128rr, X86::VPADDDZ128rm, 0 },
@@ -2022,6 +2102,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNQZ256rr, X86::VPANDNQZ256rm, 0 },
{ X86::VPANDQZ128rr, X86::VPANDQZ128rm, 0 },
{ X86::VPANDQZ256rr, X86::VPANDQZ256rm, 0 },
+ { X86::VPAVGBZ128rr, X86::VPAVGBZ128rm, 0 },
+ { X86::VPAVGBZ256rr, X86::VPAVGBZ256rm, 0 },
+ { X86::VPAVGWZ128rr, X86::VPAVGWZ128rm, 0 },
+ { X86::VPAVGWZ256rr, X86::VPAVGWZ256rm, 0 },
{ X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 },
{ X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 },
{ X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 },
@@ -2070,12 +2154,92 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPMADDUBSWZ256rr, X86::VPMADDUBSWZ256rm, 0 },
{ X86::VPMADDWDZ128rr, X86::VPMADDWDZ128rm, 0 },
{ X86::VPMADDWDZ256rr, X86::VPMADDWDZ256rm, 0 },
+ { X86::VPMAXSBZ128rr, X86::VPMAXSBZ128rm, 0 },
+ { X86::VPMAXSBZ256rr, X86::VPMAXSBZ256rm, 0 },
+ { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rm, 0 },
+ { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rm, 0 },
+ { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rm, 0 },
+ { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rm, 0 },
+ { X86::VPMAXSWZ128rr, X86::VPMAXSWZ128rm, 0 },
+ { X86::VPMAXSWZ256rr, X86::VPMAXSWZ256rm, 0 },
+ { X86::VPMAXUBZ128rr, X86::VPMAXUBZ128rm, 0 },
+ { X86::VPMAXUBZ256rr, X86::VPMAXUBZ256rm, 0 },
+ { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rm, 0 },
+ { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rm, 0 },
+ { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rm, 0 },
+ { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rm, 0 },
+ { X86::VPMAXUWZ128rr, X86::VPMAXUWZ128rm, 0 },
+ { X86::VPMAXUWZ256rr, X86::VPMAXUWZ256rm, 0 },
+ { X86::VPMINSBZ128rr, X86::VPMINSBZ128rm, 0 },
+ { X86::VPMINSBZ256rr, X86::VPMINSBZ256rm, 0 },
+ { X86::VPMINSDZ128rr, X86::VPMINSDZ128rm, 0 },
+ { X86::VPMINSDZ256rr, X86::VPMINSDZ256rm, 0 },
+ { X86::VPMINSQZ128rr, X86::VPMINSQZ128rm, 0 },
+ { X86::VPMINSQZ256rr, X86::VPMINSQZ256rm, 0 },
+ { X86::VPMINSWZ128rr, X86::VPMINSWZ128rm, 0 },
+ { X86::VPMINSWZ256rr, X86::VPMINSWZ256rm, 0 },
+ { X86::VPMINUBZ128rr, X86::VPMINUBZ128rm, 0 },
+ { X86::VPMINUBZ256rr, X86::VPMINUBZ256rm, 0 },
+ { X86::VPMINUDZ128rr, X86::VPMINUDZ128rm, 0 },
+ { X86::VPMINUDZ256rr, X86::VPMINUDZ256rm, 0 },
+ { X86::VPMINUQZ128rr, X86::VPMINUQZ128rm, 0 },
+ { X86::VPMINUQZ256rr, X86::VPMINUQZ256rm, 0 },
+ { X86::VPMINUWZ128rr, X86::VPMINUWZ128rm, 0 },
+ { X86::VPMINUWZ256rr, X86::VPMINUWZ256rm, 0 },
+ { X86::VPMULDQZ128rr, X86::VPMULDQZ128rm, 0 },
+ { X86::VPMULDQZ256rr, X86::VPMULDQZ256rm, 0 },
+ { X86::VPMULLDZ128rr, X86::VPMULLDZ128rm, 0 },
+ { X86::VPMULLDZ256rr, X86::VPMULLDZ256rm, 0 },
+ { X86::VPMULLQZ128rr, X86::VPMULLQZ128rm, 0 },
+ { X86::VPMULLQZ256rr, X86::VPMULLQZ256rm, 0 },
+ { X86::VPMULLWZ128rr, X86::VPMULLWZ128rm, 0 },
+ { X86::VPMULLWZ256rr, X86::VPMULLWZ256rm, 0 },
+ { X86::VPMULUDQZ128rr, X86::VPMULUDQZ128rm, 0 },
+ { X86::VPMULUDQZ256rr, X86::VPMULUDQZ256rm, 0 },
{ X86::VPORDZ128rr, X86::VPORDZ128rm, 0 },
{ X86::VPORDZ256rr, X86::VPORDZ256rm, 0 },
{ X86::VPORQZ128rr, X86::VPORQZ128rm, 0 },
{ X86::VPORQZ256rr, X86::VPORQZ256rm, 0 },
+ { X86::VPSADBWZ128rr, X86::VPSADBWZ128rm, 0 },
+ { X86::VPSADBWZ256rr, X86::VPSADBWZ256rm, 0 },
{ X86::VPSHUFBZ128rr, X86::VPSHUFBZ128rm, 0 },
{ X86::VPSHUFBZ256rr, X86::VPSHUFBZ256rm, 0 },
+ { X86::VPSLLDZ128rr, X86::VPSLLDZ128rm, 0 },
+ { X86::VPSLLDZ256rr, X86::VPSLLDZ256rm, 0 },
+ { X86::VPSLLQZ128rr, X86::VPSLLQZ128rm, 0 },
+ { X86::VPSLLQZ256rr, X86::VPSLLQZ256rm, 0 },
+ { X86::VPSLLVDZ128rr, X86::VPSLLVDZ128rm, 0 },
+ { X86::VPSLLVDZ256rr, X86::VPSLLVDZ256rm, 0 },
+ { X86::VPSLLVQZ128rr, X86::VPSLLVQZ128rm, 0 },
+ { X86::VPSLLVQZ256rr, X86::VPSLLVQZ256rm, 0 },
+ { X86::VPSLLVWZ128rr, X86::VPSLLVWZ128rm, 0 },
+ { X86::VPSLLVWZ256rr, X86::VPSLLVWZ256rm, 0 },
+ { X86::VPSLLWZ128rr, X86::VPSLLWZ128rm, 0 },
+ { X86::VPSLLWZ256rr, X86::VPSLLWZ256rm, 0 },
+ { X86::VPSRADZ128rr, X86::VPSRADZ128rm, 0 },
+ { X86::VPSRADZ256rr, X86::VPSRADZ256rm, 0 },
+ { X86::VPSRAQZ128rr, X86::VPSRAQZ128rm, 0 },
+ { X86::VPSRAQZ256rr, X86::VPSRAQZ256rm, 0 },
+ { X86::VPSRAVDZ128rr, X86::VPSRAVDZ128rm, 0 },
+ { X86::VPSRAVDZ256rr, X86::VPSRAVDZ256rm, 0 },
+ { X86::VPSRAVQZ128rr, X86::VPSRAVQZ128rm, 0 },
+ { X86::VPSRAVQZ256rr, X86::VPSRAVQZ256rm, 0 },
+ { X86::VPSRAVWZ128rr, X86::VPSRAVWZ128rm, 0 },
+ { X86::VPSRAVWZ256rr, X86::VPSRAVWZ256rm, 0 },
+ { X86::VPSRAWZ128rr, X86::VPSRAWZ128rm, 0 },
+ { X86::VPSRAWZ256rr, X86::VPSRAWZ256rm, 0 },
+ { X86::VPSRLDZ128rr, X86::VPSRLDZ128rm, 0 },
+ { X86::VPSRLDZ256rr, X86::VPSRLDZ256rm, 0 },
+ { X86::VPSRLQZ128rr, X86::VPSRLQZ128rm, 0 },
+ { X86::VPSRLQZ256rr, X86::VPSRLQZ256rm, 0 },
+ { X86::VPSRLVDZ128rr, X86::VPSRLVDZ128rm, 0 },
+ { X86::VPSRLVDZ256rr, X86::VPSRLVDZ256rm, 0 },
+ { X86::VPSRLVQZ128rr, X86::VPSRLVQZ128rm, 0 },
+ { X86::VPSRLVQZ256rr, X86::VPSRLVQZ256rm, 0 },
+ { X86::VPSRLVWZ128rr, X86::VPSRLVWZ128rm, 0 },
+ { X86::VPSRLVWZ256rr, X86::VPSRLVWZ256rm, 0 },
+ { X86::VPSRLWZ128rr, X86::VPSRLWZ128rm, 0 },
+ { X86::VPSRLWZ256rr, X86::VPSRLWZ256rm, 0 },
{ X86::VPSUBBZ128rr, X86::VPSUBBZ128rm, 0 },
{ X86::VPSUBBZ256rr, X86::VPSUBBZ256rm, 0 },
{ X86::VPSUBDZ128rr, X86::VPSUBDZ128rm, 0 },
@@ -2112,6 +2276,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 },
{ X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 },
{ X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 },
+ { X86::VSHUFPDZ128rri, X86::VSHUFPDZ128rmi, 0 },
+ { X86::VSHUFPDZ256rri, X86::VSHUFPDZ256rmi, 0 },
+ { X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmi, 0 },
+ { X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmi, 0 },
{ X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0 },
{ X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 },
{ X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 },
@@ -2130,6 +2298,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 },
// AVX-512 masked foldable instructions
+ { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
+ { X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 },
+ { X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 },
+ { X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 },
+ { X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 },
{ X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 },
{ X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 },
{ X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 },
@@ -2149,8 +2323,23 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 },
{ X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 },
{ X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 },
+ { X86::VPSLLDZrikz, X86::VPSLLDZmikz, 0 },
+ { X86::VPSLLQZrikz, X86::VPSLLQZmikz, 0 },
+ { X86::VPSLLWZrikz, X86::VPSLLWZmikz, 0 },
+ { X86::VPSRADZrikz, X86::VPSRADZmikz, 0 },
+ { X86::VPSRAQZrikz, X86::VPSRAQZmikz, 0 },
+ { X86::VPSRAWZrikz, X86::VPSRAWZmikz, 0 },
+ { X86::VPSRLDZrikz, X86::VPSRLDZmikz, 0 },
+ { X86::VPSRLQZrikz, X86::VPSRLQZmikz, 0 },
+ { X86::VPSRLWZrikz, X86::VPSRLWZmikz, 0 },
// AVX-512VL 256-bit masked foldable instructions
+ { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
+ { X86::VPABSBZ256rrkz, X86::VPABSBZ256rmkz, 0 },
+ { X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 },
+ { X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 },
+ { X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 },
{ X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 },
{ X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 },
{ X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 },
@@ -2170,8 +2359,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZ256rikz, X86::VPSHUFDZ256mikz, 0 },
{ X86::VPSHUFHWZ256rikz, X86::VPSHUFHWZ256mikz, 0 },
{ X86::VPSHUFLWZ256rikz, X86::VPSHUFLWZ256mikz, 0 },
+ { X86::VPSLLDZ256rikz, X86::VPSLLDZ256mikz, 0 },
+ { X86::VPSLLQZ256rikz, X86::VPSLLQZ256mikz, 0 },
+ { X86::VPSLLWZ256rikz, X86::VPSLLWZ256mikz, 0 },
+ { X86::VPSRADZ256rikz, X86::VPSRADZ256mikz, 0 },
+ { X86::VPSRAQZ256rikz, X86::VPSRAQZ256mikz, 0 },
+ { X86::VPSRAWZ256rikz, X86::VPSRAWZ256mikz, 0 },
+ { X86::VPSRLDZ256rikz, X86::VPSRLDZ256mikz, 0 },
+ { X86::VPSRLQZ256rikz, X86::VPSRLQZ256mikz, 0 },
+ { X86::VPSRLWZ256rikz, X86::VPSRLWZ256mikz, 0 },
// AVX-512VL 128-bit masked foldable instructions
+ { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
+ { X86::VPABSBZ128rrkz, X86::VPABSBZ128rmkz, 0 },
+ { X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 },
+ { X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 },
+ { X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 },
{ X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 },
{ X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 },
{ X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE },
@@ -2189,6 +2392,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZ128rikz, X86::VPSHUFDZ128mikz, 0 },
{ X86::VPSHUFHWZ128rikz, X86::VPSHUFHWZ128mikz, 0 },
{ X86::VPSHUFLWZ128rikz, X86::VPSHUFLWZ128mikz, 0 },
+ { X86::VPSLLDZ128rikz, X86::VPSLLDZ128mikz, 0 },
+ { X86::VPSLLQZ128rikz, X86::VPSLLQZ128mikz, 0 },
+ { X86::VPSLLWZ128rikz, X86::VPSLLWZ128mikz, 0 },
+ { X86::VPSRADZ128rikz, X86::VPSRADZ128mikz, 0 },
+ { X86::VPSRAQZ128rikz, X86::VPSRAQZ128mikz, 0 },
+ { X86::VPSRAWZ128rikz, X86::VPSRAWZ128mikz, 0 },
+ { X86::VPSRLDZ128rikz, X86::VPSRLDZ128mikz, 0 },
+ { X86::VPSRLQZ128rikz, X86::VPSRLQZ128mikz, 0 },
+ { X86::VPSRLWZ128rikz, X86::VPSRLWZ128mikz, 0 },
// AES foldable instructions
{ X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
@@ -2262,23 +2474,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// XOP foldable instructions
{ X86::VPCMOVrrr, X86::VPCMOVrrm, 0 },
- { X86::VPCMOVrrrY, X86::VPCMOVrrmY, 0 },
+ { X86::VPCMOVYrrr, X86::VPCMOVYrrm, 0 },
{ X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 },
- { X86::VPERMIL2PDrrY, X86::VPERMIL2PDrmY, 0 },
+ { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYrm, 0 },
{ X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 },
- { X86::VPERMIL2PSrrY, X86::VPERMIL2PSrmY, 0 },
+ { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYrm, 0 },
{ X86::VPPERMrrr, X86::VPPERMrrm, 0 },
// AVX-512 instructions with 3 source operands.
- { X86::VBLENDMPDZrr, X86::VBLENDMPDZrm, 0 },
- { X86::VBLENDMPSZrr, X86::VBLENDMPSZrm, 0 },
- { X86::VPBLENDMDZrr, X86::VPBLENDMDZrm, 0 },
- { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 },
- { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE },
- { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
{ X86::VPERMI2Brr, X86::VPERMI2Brm, 0 },
{ X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
{ X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
@@ -2329,6 +2532,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// AVX-512 masked instructions
{ X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
{ X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
+ { X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VALIGNDZrrikz, X86::VALIGNDZrmikz, 0 },
{ X86::VALIGNQZrrikz, X86::VALIGNQZrmikz, 0 },
{ X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 },
@@ -2337,6 +2542,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 },
{ X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
{ X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
+ { X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 },
{ X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 },
{ X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 },
@@ -2349,14 +2556,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 },
{ X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
{ X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
+ { X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, 0 },
+ { X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, 0 },
{ X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 },
{ X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 },
{ X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
{ X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
+ { X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, 0 },
+ { X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, 0 },
{ X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
{ X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
+ { X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VORPDZrrkz, X86::VORPDZrmkz, 0 },
{ X86::VORPSZrrkz, X86::VORPSZrmkz, 0 },
+ { X86::VPACKSSDWZrrkz, X86::VPACKSSDWZrmkz, 0 },
+ { X86::VPACKSSWBZrrkz, X86::VPACKSSWBZrmkz, 0 },
+ { X86::VPACKUSDWZrrkz, X86::VPACKUSDWZrmkz, 0 },
+ { X86::VPACKUSWBZrrkz, X86::VPACKUSWBZrmkz, 0 },
{ X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 },
{ X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 },
{ X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 },
@@ -2370,6 +2587,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 },
{ X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 },
{ X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 },
+ { X86::VPAVGBZrrkz, X86::VPAVGBZrmkz, 0 },
+ { X86::VPAVGWZrrkz, X86::VPAVGWZrmkz, 0 },
{ X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 },
{ X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0 },
{ X86::VPERMILPDZrrkz, X86::VPERMILPDZrmkz, 0 },
@@ -2380,9 +2599,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMWZrrkz, X86::VPERMWZrmkz, 0 },
{ X86::VPMADDUBSWZrrkz, X86::VPMADDUBSWZrmkz, 0 },
{ X86::VPMADDWDZrrkz, X86::VPMADDWDZrmkz, 0 },
+ { X86::VPMAXSBZrrkz, X86::VPMAXSBZrmkz, 0 },
+ { X86::VPMAXSDZrrkz, X86::VPMAXSDZrmkz, 0 },
+ { X86::VPMAXSQZrrkz, X86::VPMAXSQZrmkz, 0 },
+ { X86::VPMAXSWZrrkz, X86::VPMAXSWZrmkz, 0 },
+ { X86::VPMAXUBZrrkz, X86::VPMAXUBZrmkz, 0 },
+ { X86::VPMAXUDZrrkz, X86::VPMAXUDZrmkz, 0 },
+ { X86::VPMAXUQZrrkz, X86::VPMAXUQZrmkz, 0 },
+ { X86::VPMAXUWZrrkz, X86::VPMAXUWZrmkz, 0 },
+ { X86::VPMINSBZrrkz, X86::VPMINSBZrmkz, 0 },
+ { X86::VPMINSDZrrkz, X86::VPMINSDZrmkz, 0 },
+ { X86::VPMINSQZrrkz, X86::VPMINSQZrmkz, 0 },
+ { X86::VPMINSWZrrkz, X86::VPMINSWZrmkz, 0 },
+ { X86::VPMINUBZrrkz, X86::VPMINUBZrmkz, 0 },
+ { X86::VPMINUDZrrkz, X86::VPMINUDZrmkz, 0 },
+ { X86::VPMINUQZrrkz, X86::VPMINUQZrmkz, 0 },
+ { X86::VPMINUWZrrkz, X86::VPMINUWZrmkz, 0 },
+ { X86::VPMULLDZrrkz, X86::VPMULLDZrmkz, 0 },
+ { X86::VPMULLQZrrkz, X86::VPMULLQZrmkz, 0 },
+ { X86::VPMULLWZrrkz, X86::VPMULLWZrmkz, 0 },
+ { X86::VPMULDQZrrkz, X86::VPMULDQZrmkz, 0 },
+ { X86::VPMULUDQZrrkz, X86::VPMULUDQZrmkz, 0 },
{ X86::VPORDZrrkz, X86::VPORDZrmkz, 0 },
{ X86::VPORQZrrkz, X86::VPORQZrmkz, 0 },
{ X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 },
+ { X86::VPSLLDZrrkz, X86::VPSLLDZrmkz, 0 },
+ { X86::VPSLLQZrrkz, X86::VPSLLQZrmkz, 0 },
+ { X86::VPSLLVDZrrkz, X86::VPSLLVDZrmkz, 0 },
+ { X86::VPSLLVQZrrkz, X86::VPSLLVQZrmkz, 0 },
+ { X86::VPSLLVWZrrkz, X86::VPSLLVWZrmkz, 0 },
+ { X86::VPSLLWZrrkz, X86::VPSLLWZrmkz, 0 },
+ { X86::VPSRADZrrkz, X86::VPSRADZrmkz, 0 },
+ { X86::VPSRAQZrrkz, X86::VPSRAQZrmkz, 0 },
+ { X86::VPSRAVDZrrkz, X86::VPSRAVDZrmkz, 0 },
+ { X86::VPSRAVQZrrkz, X86::VPSRAVQZrmkz, 0 },
+ { X86::VPSRAVWZrrkz, X86::VPSRAVWZrmkz, 0 },
+ { X86::VPSRAWZrrkz, X86::VPSRAWZrmkz, 0 },
+ { X86::VPSRLDZrrkz, X86::VPSRLDZrmkz, 0 },
+ { X86::VPSRLQZrrkz, X86::VPSRLQZrmkz, 0 },
+ { X86::VPSRLVDZrrkz, X86::VPSRLVDZrmkz, 0 },
+ { X86::VPSRLVQZrrkz, X86::VPSRLVQZrmkz, 0 },
+ { X86::VPSRLVWZrrkz, X86::VPSRLVWZrmkz, 0 },
+ { X86::VPSRLWZrrkz, X86::VPSRLWZrmkz, 0 },
{ X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 },
{ X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 },
{ X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 },
@@ -2401,8 +2659,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 },
{ X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 },
{ X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 },
+ { X86::VSHUFPDZrrikz, X86::VSHUFPDZrmikz, 0 },
+ { X86::VSHUFPSZrrikz, X86::VSHUFPSZrmikz, 0 },
{ X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
{ X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
+ { X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VUNPCKHPDZrrkz, X86::VUNPCKHPDZrmkz, 0 },
{ X86::VUNPCKHPSZrrkz, X86::VUNPCKHPSZrmkz, 0 },
{ X86::VUNPCKLPDZrrkz, X86::VUNPCKLPDZrmkz, 0 },
@@ -2437,6 +2699,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
{ X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 },
{ X86::VORPSZ256rrkz, X86::VORPSZ256rmkz, 0 },
+ { X86::VPACKSSDWZ256rrkz, X86::VPACKSSDWZ256rmkz, 0 },
+ { X86::VPACKSSWBZ256rrkz, X86::VPACKSSWBZ256rmkz, 0 },
+ { X86::VPACKUSDWZ256rrkz, X86::VPACKUSDWZ256rmkz, 0 },
+ { X86::VPACKUSWBZ256rrkz, X86::VPACKUSWBZ256rmkz, 0 },
{ X86::VPADDBZ256rrkz, X86::VPADDBZ256rmkz, 0 },
{ X86::VPADDDZ256rrkz, X86::VPADDDZ256rmkz, 0 },
{ X86::VPADDQZ256rrkz, X86::VPADDQZ256rmkz, 0 },
@@ -2450,6 +2716,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZ256rrkz, X86::VPANDNDZ256rmkz, 0 },
{ X86::VPANDNQZ256rrkz, X86::VPANDNQZ256rmkz, 0 },
{ X86::VPANDQZ256rrkz, X86::VPANDQZ256rmkz, 0 },
+ { X86::VPAVGBZ256rrkz, X86::VPAVGBZ256rmkz, 0 },
+ { X86::VPAVGWZ256rrkz, X86::VPAVGWZ256rmkz, 0 },
{ X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 },
{ X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0 },
{ X86::VPERMILPDZ256rrkz, X86::VPERMILPDZ256rmkz, 0 },
@@ -2460,9 +2728,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMWZ256rrkz, X86::VPERMWZ256rmkz, 0 },
{ X86::VPMADDUBSWZ256rrkz, X86::VPMADDUBSWZ256rmkz, 0 },
{ X86::VPMADDWDZ256rrkz, X86::VPMADDWDZ256rmkz, 0 },
+ { X86::VPMAXSBZ256rrkz, X86::VPMAXSBZ256rmkz, 0 },
+ { X86::VPMAXSDZ256rrkz, X86::VPMAXSDZ256rmkz, 0 },
+ { X86::VPMAXSQZ256rrkz, X86::VPMAXSQZ256rmkz, 0 },
+ { X86::VPMAXSWZ256rrkz, X86::VPMAXSWZ256rmkz, 0 },
+ { X86::VPMAXUBZ256rrkz, X86::VPMAXUBZ256rmkz, 0 },
+ { X86::VPMAXUDZ256rrkz, X86::VPMAXUDZ256rmkz, 0 },
+ { X86::VPMAXUQZ256rrkz, X86::VPMAXUQZ256rmkz, 0 },
+ { X86::VPMAXUWZ256rrkz, X86::VPMAXUWZ256rmkz, 0 },
+ { X86::VPMINSBZ256rrkz, X86::VPMINSBZ256rmkz, 0 },
+ { X86::VPMINSDZ256rrkz, X86::VPMINSDZ256rmkz, 0 },
+ { X86::VPMINSQZ256rrkz, X86::VPMINSQZ256rmkz, 0 },
+ { X86::VPMINSWZ256rrkz, X86::VPMINSWZ256rmkz, 0 },
+ { X86::VPMINUBZ256rrkz, X86::VPMINUBZ256rmkz, 0 },
+ { X86::VPMINUDZ256rrkz, X86::VPMINUDZ256rmkz, 0 },
+ { X86::VPMINUQZ256rrkz, X86::VPMINUQZ256rmkz, 0 },
+ { X86::VPMINUWZ256rrkz, X86::VPMINUWZ256rmkz, 0 },
+ { X86::VPMULDQZ256rrkz, X86::VPMULDQZ256rmkz, 0 },
+ { X86::VPMULLDZ256rrkz, X86::VPMULLDZ256rmkz, 0 },
+ { X86::VPMULLQZ256rrkz, X86::VPMULLQZ256rmkz, 0 },
+ { X86::VPMULLWZ256rrkz, X86::VPMULLWZ256rmkz, 0 },
+ { X86::VPMULUDQZ256rrkz, X86::VPMULUDQZ256rmkz, 0 },
{ X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 },
{ X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 },
{ X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 },
+ { X86::VPSLLDZ256rrkz, X86::VPSLLDZ256rmkz, 0 },
+ { X86::VPSLLQZ256rrkz, X86::VPSLLQZ256rmkz, 0 },
+ { X86::VPSLLVDZ256rrkz, X86::VPSLLVDZ256rmkz, 0 },
+ { X86::VPSLLVQZ256rrkz, X86::VPSLLVQZ256rmkz, 0 },
+ { X86::VPSLLVWZ256rrkz, X86::VPSLLVWZ256rmkz, 0 },
+ { X86::VPSLLWZ256rrkz, X86::VPSLLWZ256rmkz, 0 },
+ { X86::VPSRADZ256rrkz, X86::VPSRADZ256rmkz, 0 },
+ { X86::VPSRAQZ256rrkz, X86::VPSRAQZ256rmkz, 0 },
+ { X86::VPSRAVDZ256rrkz, X86::VPSRAVDZ256rmkz, 0 },
+ { X86::VPSRAVQZ256rrkz, X86::VPSRAVQZ256rmkz, 0 },
+ { X86::VPSRAVWZ256rrkz, X86::VPSRAVWZ256rmkz, 0 },
+ { X86::VPSRAWZ256rrkz, X86::VPSRAWZ256rmkz, 0 },
+ { X86::VPSRLDZ256rrkz, X86::VPSRLDZ256rmkz, 0 },
+ { X86::VPSRLQZ256rrkz, X86::VPSRLQZ256rmkz, 0 },
+ { X86::VPSRLVDZ256rrkz, X86::VPSRLVDZ256rmkz, 0 },
+ { X86::VPSRLVQZ256rrkz, X86::VPSRLVQZ256rmkz, 0 },
+ { X86::VPSRLVWZ256rrkz, X86::VPSRLVWZ256rmkz, 0 },
+ { X86::VPSRLWZ256rrkz, X86::VPSRLWZ256rmkz, 0 },
{ X86::VPSUBBZ256rrkz, X86::VPSUBBZ256rmkz, 0 },
{ X86::VPSUBDZ256rrkz, X86::VPSUBDZ256rmkz, 0 },
{ X86::VPSUBQZ256rrkz, X86::VPSUBQZ256rmkz, 0 },
@@ -2481,6 +2788,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 },
{ X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 },
{ X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 },
+ { X86::VSHUFPDZ256rrikz, X86::VSHUFPDZ256rmikz, 0 },
+ { X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmikz, 0 },
{ X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 },
{ X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 },
{ X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 },
@@ -2513,6 +2822,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
{ X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 },
{ X86::VORPSZ128rrkz, X86::VORPSZ128rmkz, 0 },
+ { X86::VPACKSSDWZ128rrkz, X86::VPACKSSDWZ128rmkz, 0 },
+ { X86::VPACKSSWBZ128rrkz, X86::VPACKSSWBZ128rmkz, 0 },
+ { X86::VPACKUSDWZ128rrkz, X86::VPACKUSDWZ128rmkz, 0 },
+ { X86::VPACKUSWBZ128rrkz, X86::VPACKUSWBZ128rmkz, 0 },
{ X86::VPADDBZ128rrkz, X86::VPADDBZ128rmkz, 0 },
{ X86::VPADDDZ128rrkz, X86::VPADDDZ128rmkz, 0 },
{ X86::VPADDQZ128rrkz, X86::VPADDQZ128rmkz, 0 },
@@ -2526,15 +2839,56 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZ128rrkz, X86::VPANDNDZ128rmkz, 0 },
{ X86::VPANDNQZ128rrkz, X86::VPANDNQZ128rmkz, 0 },
{ X86::VPANDQZ128rrkz, X86::VPANDQZ128rmkz, 0 },
+ { X86::VPAVGBZ128rrkz, X86::VPAVGBZ128rmkz, 0 },
+ { X86::VPAVGWZ128rrkz, X86::VPAVGWZ128rmkz, 0 },
{ X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 },
{ X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmkz, 0 },
{ X86::VPERMILPSZ128rrkz, X86::VPERMILPSZ128rmkz, 0 },
{ X86::VPERMWZ128rrkz, X86::VPERMWZ128rmkz, 0 },
{ X86::VPMADDUBSWZ128rrkz, X86::VPMADDUBSWZ128rmkz, 0 },
{ X86::VPMADDWDZ128rrkz, X86::VPMADDWDZ128rmkz, 0 },
+ { X86::VPMAXSBZ128rrkz, X86::VPMAXSBZ128rmkz, 0 },
+ { X86::VPMAXSDZ128rrkz, X86::VPMAXSDZ128rmkz, 0 },
+ { X86::VPMAXSQZ128rrkz, X86::VPMAXSQZ128rmkz, 0 },
+ { X86::VPMAXSWZ128rrkz, X86::VPMAXSWZ128rmkz, 0 },
+ { X86::VPMAXUBZ128rrkz, X86::VPMAXUBZ128rmkz, 0 },
+ { X86::VPMAXUDZ128rrkz, X86::VPMAXUDZ128rmkz, 0 },
+ { X86::VPMAXUQZ128rrkz, X86::VPMAXUQZ128rmkz, 0 },
+ { X86::VPMAXUWZ128rrkz, X86::VPMAXUWZ128rmkz, 0 },
+ { X86::VPMINSBZ128rrkz, X86::VPMINSBZ128rmkz, 0 },
+ { X86::VPMINSDZ128rrkz, X86::VPMINSDZ128rmkz, 0 },
+ { X86::VPMINSQZ128rrkz, X86::VPMINSQZ128rmkz, 0 },
+ { X86::VPMINSWZ128rrkz, X86::VPMINSWZ128rmkz, 0 },
+ { X86::VPMINUBZ128rrkz, X86::VPMINUBZ128rmkz, 0 },
+ { X86::VPMINUDZ128rrkz, X86::VPMINUDZ128rmkz, 0 },
+ { X86::VPMINUQZ128rrkz, X86::VPMINUQZ128rmkz, 0 },
+ { X86::VPMINUWZ128rrkz, X86::VPMINUWZ128rmkz, 0 },
+ { X86::VPMULDQZ128rrkz, X86::VPMULDQZ128rmkz, 0 },
+ { X86::VPMULLDZ128rrkz, X86::VPMULLDZ128rmkz, 0 },
+ { X86::VPMULLQZ128rrkz, X86::VPMULLQZ128rmkz, 0 },
+ { X86::VPMULLWZ128rrkz, X86::VPMULLWZ128rmkz, 0 },
+ { X86::VPMULUDQZ128rrkz, X86::VPMULUDQZ128rmkz, 0 },
{ X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 },
{ X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 },
{ X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 },
+ { X86::VPSLLDZ128rrkz, X86::VPSLLDZ128rmkz, 0 },
+ { X86::VPSLLQZ128rrkz, X86::VPSLLQZ128rmkz, 0 },
+ { X86::VPSLLVDZ128rrkz, X86::VPSLLVDZ128rmkz, 0 },
+ { X86::VPSLLVQZ128rrkz, X86::VPSLLVQZ128rmkz, 0 },
+ { X86::VPSLLVWZ128rrkz, X86::VPSLLVWZ128rmkz, 0 },
+ { X86::VPSLLWZ128rrkz, X86::VPSLLWZ128rmkz, 0 },
+ { X86::VPSRADZ128rrkz, X86::VPSRADZ128rmkz, 0 },
+ { X86::VPSRAQZ128rrkz, X86::VPSRAQZ128rmkz, 0 },
+ { X86::VPSRAVDZ128rrkz, X86::VPSRAVDZ128rmkz, 0 },
+ { X86::VPSRAVQZ128rrkz, X86::VPSRAVQZ128rmkz, 0 },
+ { X86::VPSRAVWZ128rrkz, X86::VPSRAVWZ128rmkz, 0 },
+ { X86::VPSRAWZ128rrkz, X86::VPSRAWZ128rmkz, 0 },
+ { X86::VPSRLDZ128rrkz, X86::VPSRLDZ128rmkz, 0 },
+ { X86::VPSRLQZ128rrkz, X86::VPSRLQZ128rmkz, 0 },
+ { X86::VPSRLVDZ128rrkz, X86::VPSRLVDZ128rmkz, 0 },
+ { X86::VPSRLVQZ128rrkz, X86::VPSRLVQZ128rmkz, 0 },
+ { X86::VPSRLVWZ128rrkz, X86::VPSRLVWZ128rmkz, 0 },
+ { X86::VPSRLWZ128rrkz, X86::VPSRLWZ128rmkz, 0 },
{ X86::VPSUBBZ128rrkz, X86::VPSUBBZ128rmkz, 0 },
{ X86::VPSUBDZ128rrkz, X86::VPSUBDZ128rmkz, 0 },
{ X86::VPSUBQZ128rrkz, X86::VPSUBQZ128rmkz, 0 },
@@ -2553,6 +2907,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLWDZ128rrkz, X86::VPUNPCKLWDZ128rmkz, 0 },
{ X86::VPXORDZ128rrkz, X86::VPXORDZ128rmkz, 0 },
{ X86::VPXORQZ128rrkz, X86::VPXORQZ128rmkz, 0 },
+ { X86::VSHUFPDZ128rrikz, X86::VSHUFPDZ128rmikz, 0 },
+ { X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmikz, 0 },
{ X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
{ X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
{ X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 },
@@ -2563,6 +2919,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 },
// AVX-512 masked foldable instructions
+ { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
+ { X86::VPABSBZrrk, X86::VPABSBZrmk, 0 },
+ { X86::VPABSDZrrk, X86::VPABSDZrmk, 0 },
+ { X86::VPABSQZrrk, X86::VPABSQZrmk, 0 },
+ { X86::VPABSWZrrk, X86::VPABSWZrmk, 0 },
{ X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 },
{ X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 },
{ X86::VPERMPDZrik, X86::VPERMPDZmik, 0 },
@@ -2582,8 +2944,23 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 },
{ X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 },
{ X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 },
+ { X86::VPSLLDZrik, X86::VPSLLDZmik, 0 },
+ { X86::VPSLLQZrik, X86::VPSLLQZmik, 0 },
+ { X86::VPSLLWZrik, X86::VPSLLWZmik, 0 },
+ { X86::VPSRADZrik, X86::VPSRADZmik, 0 },
+ { X86::VPSRAQZrik, X86::VPSRAQZmik, 0 },
+ { X86::VPSRAWZrik, X86::VPSRAWZmik, 0 },
+ { X86::VPSRLDZrik, X86::VPSRLDZmik, 0 },
+ { X86::VPSRLQZrik, X86::VPSRLQZmik, 0 },
+ { X86::VPSRLWZrik, X86::VPSRLWZmik, 0 },
// AVX-512VL 256-bit masked foldable instructions
+ { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
+ { X86::VPABSBZ256rrk, X86::VPABSBZ256rmk, 0 },
+ { X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 },
+ { X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 },
+ { X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 },
{ X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 },
{ X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 },
{ X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 },
@@ -2603,8 +2980,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZ256rik, X86::VPSHUFDZ256mik, 0 },
{ X86::VPSHUFHWZ256rik, X86::VPSHUFHWZ256mik, 0 },
{ X86::VPSHUFLWZ256rik, X86::VPSHUFLWZ256mik, 0 },
+ { X86::VPSLLDZ256rik, X86::VPSLLDZ256mik, 0 },
+ { X86::VPSLLQZ256rik, X86::VPSLLQZ256mik, 0 },
+ { X86::VPSLLWZ256rik, X86::VPSLLWZ256mik, 0 },
+ { X86::VPSRADZ256rik, X86::VPSRADZ256mik, 0 },
+ { X86::VPSRAQZ256rik, X86::VPSRAQZ256mik, 0 },
+ { X86::VPSRAWZ256rik, X86::VPSRAWZ256mik, 0 },
+ { X86::VPSRLDZ256rik, X86::VPSRLDZ256mik, 0 },
+ { X86::VPSRLQZ256rik, X86::VPSRLQZ256mik, 0 },
+ { X86::VPSRLWZ256rik, X86::VPSRLWZ256mik, 0 },
// AVX-512VL 128-bit masked foldable instructions
+ { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
+ { X86::VPABSBZ128rrk, X86::VPABSBZ128rmk, 0 },
+ { X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 },
+ { X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 },
+ { X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 },
{ X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 },
{ X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 },
{ X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE },
@@ -2622,6 +3013,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZ128rik, X86::VPSHUFDZ128mik, 0 },
{ X86::VPSHUFHWZ128rik, X86::VPSHUFHWZ128mik, 0 },
{ X86::VPSHUFLWZ128rik, X86::VPSHUFLWZ128mik, 0 },
+ { X86::VPSLLDZ128rik, X86::VPSLLDZ128mik, 0 },
+ { X86::VPSLLQZ128rik, X86::VPSLLQZ128mik, 0 },
+ { X86::VPSLLWZ128rik, X86::VPSLLWZ128mik, 0 },
+ { X86::VPSRADZ128rik, X86::VPSRADZ128mik, 0 },
+ { X86::VPSRAQZ128rik, X86::VPSRAQZ128mik, 0 },
+ { X86::VPSRAWZ128rik, X86::VPSRAWZ128mik, 0 },
+ { X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 },
+ { X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 },
+ { X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 },
};
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
@@ -2651,6 +3051,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// AVX-512 foldable masked instructions
{ X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
{ X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
+ { X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE },
{ X86::VALIGNDZrrik, X86::VALIGNDZrmik, 0 },
{ X86::VALIGNQZrrik, X86::VALIGNQZrmik, 0 },
{ X86::VANDNPDZrrk, X86::VANDNPDZrmk, 0 },
@@ -2659,6 +3061,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VANDPSZrrk, X86::VANDPSZrmk, 0 },
{ X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
{ X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
+ { X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE },
{ X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 },
{ X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 },
{ X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 },
@@ -2671,14 +3075,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 },
{ X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
{ X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
+ { X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, 0 },
+ { X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, 0 },
{ X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 },
{ X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 },
{ X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
{ X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
+ { X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, 0 },
+ { X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, 0 },
{ X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
{ X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
+ { X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE },
{ X86::VORPDZrrk, X86::VORPDZrmk, 0 },
{ X86::VORPSZrrk, X86::VORPSZrmk, 0 },
+ { X86::VPACKSSDWZrrk, X86::VPACKSSDWZrmk, 0 },
+ { X86::VPACKSSWBZrrk, X86::VPACKSSWBZrmk, 0 },
+ { X86::VPACKUSDWZrrk, X86::VPACKUSDWZrmk, 0 },
+ { X86::VPACKUSWBZrrk, X86::VPACKUSWBZrmk, 0 },
{ X86::VPADDBZrrk, X86::VPADDBZrmk, 0 },
{ X86::VPADDDZrrk, X86::VPADDDZrmk, 0 },
{ X86::VPADDQZrrk, X86::VPADDQZrmk, 0 },
@@ -2692,6 +3106,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 },
{ X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 },
{ X86::VPANDQZrrk, X86::VPANDQZrmk, 0 },
+ { X86::VPAVGBZrrk, X86::VPAVGBZrmk, 0 },
+ { X86::VPAVGWZrrk, X86::VPAVGWZrmk, 0 },
{ X86::VPERMBZrrk, X86::VPERMBZrmk, 0 },
{ X86::VPERMDZrrk, X86::VPERMDZrmk, 0 },
{ X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 },
@@ -2714,9 +3130,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMWZrrk, X86::VPERMWZrmk, 0 },
{ X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 },
{ X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 },
+ { X86::VPMAXSBZrrk, X86::VPMAXSBZrmk, 0 },
+ { X86::VPMAXSDZrrk, X86::VPMAXSDZrmk, 0 },
+ { X86::VPMAXSQZrrk, X86::VPMAXSQZrmk, 0 },
+ { X86::VPMAXSWZrrk, X86::VPMAXSWZrmk, 0 },
+ { X86::VPMAXUBZrrk, X86::VPMAXUBZrmk, 0 },
+ { X86::VPMAXUDZrrk, X86::VPMAXUDZrmk, 0 },
+ { X86::VPMAXUQZrrk, X86::VPMAXUQZrmk, 0 },
+ { X86::VPMAXUWZrrk, X86::VPMAXUWZrmk, 0 },
+ { X86::VPMINSBZrrk, X86::VPMINSBZrmk, 0 },
+ { X86::VPMINSDZrrk, X86::VPMINSDZrmk, 0 },
+ { X86::VPMINSQZrrk, X86::VPMINSQZrmk, 0 },
+ { X86::VPMINSWZrrk, X86::VPMINSWZrmk, 0 },
+ { X86::VPMINUBZrrk, X86::VPMINUBZrmk, 0 },
+ { X86::VPMINUDZrrk, X86::VPMINUDZrmk, 0 },
+ { X86::VPMINUQZrrk, X86::VPMINUQZrmk, 0 },
+ { X86::VPMINUWZrrk, X86::VPMINUWZrmk, 0 },
+ { X86::VPMULDQZrrk, X86::VPMULDQZrmk, 0 },
+ { X86::VPMULLDZrrk, X86::VPMULLDZrmk, 0 },
+ { X86::VPMULLQZrrk, X86::VPMULLQZrmk, 0 },
+ { X86::VPMULLWZrrk, X86::VPMULLWZrmk, 0 },
+ { X86::VPMULUDQZrrk, X86::VPMULUDQZrmk, 0 },
{ X86::VPORDZrrk, X86::VPORDZrmk, 0 },
{ X86::VPORQZrrk, X86::VPORQZrmk, 0 },
{ X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 },
+ { X86::VPSLLDZrrk, X86::VPSLLDZrmk, 0 },
+ { X86::VPSLLQZrrk, X86::VPSLLQZrmk, 0 },
+ { X86::VPSLLVDZrrk, X86::VPSLLVDZrmk, 0 },
+ { X86::VPSLLVQZrrk, X86::VPSLLVQZrmk, 0 },
+ { X86::VPSLLVWZrrk, X86::VPSLLVWZrmk, 0 },
+ { X86::VPSLLWZrrk, X86::VPSLLWZrmk, 0 },
+ { X86::VPSRADZrrk, X86::VPSRADZrmk, 0 },
+ { X86::VPSRAQZrrk, X86::VPSRAQZrmk, 0 },
+ { X86::VPSRAVDZrrk, X86::VPSRAVDZrmk, 0 },
+ { X86::VPSRAVQZrrk, X86::VPSRAVQZrmk, 0 },
+ { X86::VPSRAVWZrrk, X86::VPSRAVWZrmk, 0 },
+ { X86::VPSRAWZrrk, X86::VPSRAWZrmk, 0 },
+ { X86::VPSRLDZrrk, X86::VPSRLDZrmk, 0 },
+ { X86::VPSRLQZrrk, X86::VPSRLQZrmk, 0 },
+ { X86::VPSRLVDZrrk, X86::VPSRLVDZrmk, 0 },
+ { X86::VPSRLVQZrrk, X86::VPSRLVQZrmk, 0 },
+ { X86::VPSRLVWZrrk, X86::VPSRLVWZrmk, 0 },
+ { X86::VPSRLWZrrk, X86::VPSRLWZrmk, 0 },
{ X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 },
{ X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 },
{ X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 },
@@ -2736,8 +3191,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 },
{ X86::VPXORDZrrk, X86::VPXORDZrmk, 0 },
{ X86::VPXORQZrrk, X86::VPXORQZrmk, 0 },
+ { X86::VSHUFPDZrrik, X86::VSHUFPDZrmik, 0 },
+ { X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 },
{ X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
{ X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
+ { X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE },
{ X86::VUNPCKHPDZrrk, X86::VUNPCKHPDZrmk, 0 },
{ X86::VUNPCKHPSZrrk, X86::VUNPCKHPSZrmk, 0 },
{ X86::VUNPCKLPDZrrk, X86::VUNPCKLPDZrmk, 0 },
@@ -2772,6 +3231,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
{ X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 },
{ X86::VORPSZ256rrk, X86::VORPSZ256rmk, 0 },
+ { X86::VPACKSSDWZ256rrk, X86::VPACKSSDWZ256rmk, 0 },
+ { X86::VPACKSSWBZ256rrk, X86::VPACKSSWBZ256rmk, 0 },
+ { X86::VPACKUSDWZ256rrk, X86::VPACKUSDWZ256rmk, 0 },
+ { X86::VPACKUSWBZ256rrk, X86::VPACKUSWBZ256rmk, 0 },
{ X86::VPADDBZ256rrk, X86::VPADDBZ256rmk, 0 },
{ X86::VPADDDZ256rrk, X86::VPADDDZ256rmk, 0 },
{ X86::VPADDQZ256rrk, X86::VPADDQZ256rmk, 0 },
@@ -2785,6 +3248,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZ256rrk, X86::VPANDNDZ256rmk, 0 },
{ X86::VPANDNQZ256rrk, X86::VPANDNQZ256rmk, 0 },
{ X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 },
+ { X86::VPAVGBZ256rrk, X86::VPAVGBZ256rmk, 0 },
+ { X86::VPAVGWZ256rrk, X86::VPAVGWZ256rmk, 0 },
{ X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 },
{ X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 },
{ X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 },
@@ -2807,9 +3272,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 },
{ X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 },
{ X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 },
+ { X86::VPMAXSBZ256rrk, X86::VPMAXSBZ256rmk, 0 },
+ { X86::VPMAXSDZ256rrk, X86::VPMAXSDZ256rmk, 0 },
+ { X86::VPMAXSQZ256rrk, X86::VPMAXSQZ256rmk, 0 },
+ { X86::VPMAXSWZ256rrk, X86::VPMAXSWZ256rmk, 0 },
+ { X86::VPMAXUBZ256rrk, X86::VPMAXUBZ256rmk, 0 },
+ { X86::VPMAXUDZ256rrk, X86::VPMAXUDZ256rmk, 0 },
+ { X86::VPMAXUQZ256rrk, X86::VPMAXUQZ256rmk, 0 },
+ { X86::VPMAXUWZ256rrk, X86::VPMAXUWZ256rmk, 0 },
+ { X86::VPMINSBZ256rrk, X86::VPMINSBZ256rmk, 0 },
+ { X86::VPMINSDZ256rrk, X86::VPMINSDZ256rmk, 0 },
+ { X86::VPMINSQZ256rrk, X86::VPMINSQZ256rmk, 0 },
+ { X86::VPMINSWZ256rrk, X86::VPMINSWZ256rmk, 0 },
+ { X86::VPMINUBZ256rrk, X86::VPMINUBZ256rmk, 0 },
+ { X86::VPMINUDZ256rrk, X86::VPMINUDZ256rmk, 0 },
+ { X86::VPMINUQZ256rrk, X86::VPMINUQZ256rmk, 0 },
+ { X86::VPMINUWZ256rrk, X86::VPMINUWZ256rmk, 0 },
+ { X86::VPMULDQZ256rrk, X86::VPMULDQZ256rmk, 0 },
+ { X86::VPMULLDZ256rrk, X86::VPMULLDZ256rmk, 0 },
+ { X86::VPMULLQZ256rrk, X86::VPMULLQZ256rmk, 0 },
+ { X86::VPMULLWZ256rrk, X86::VPMULLWZ256rmk, 0 },
+ { X86::VPMULUDQZ256rrk, X86::VPMULUDQZ256rmk, 0 },
{ X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 },
{ X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 },
{ X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 },
+ { X86::VPSLLDZ256rrk, X86::VPSLLDZ256rmk, 0 },
+ { X86::VPSLLQZ256rrk, X86::VPSLLQZ256rmk, 0 },
+ { X86::VPSLLVDZ256rrk, X86::VPSLLVDZ256rmk, 0 },
+ { X86::VPSLLVQZ256rrk, X86::VPSLLVQZ256rmk, 0 },
+ { X86::VPSLLVWZ256rrk, X86::VPSLLVWZ256rmk, 0 },
+ { X86::VPSLLWZ256rrk, X86::VPSLLWZ256rmk, 0 },
+ { X86::VPSRADZ256rrk, X86::VPSRADZ256rmk, 0 },
+ { X86::VPSRAQZ256rrk, X86::VPSRAQZ256rmk, 0 },
+ { X86::VPSRAVDZ256rrk, X86::VPSRAVDZ256rmk, 0 },
+ { X86::VPSRAVQZ256rrk, X86::VPSRAVQZ256rmk, 0 },
+ { X86::VPSRAVWZ256rrk, X86::VPSRAVWZ256rmk, 0 },
+ { X86::VPSRAWZ256rrk, X86::VPSRAWZ256rmk, 0 },
+ { X86::VPSRLDZ256rrk, X86::VPSRLDZ256rmk, 0 },
+ { X86::VPSRLQZ256rrk, X86::VPSRLQZ256rmk, 0 },
+ { X86::VPSRLVDZ256rrk, X86::VPSRLVDZ256rmk, 0 },
+ { X86::VPSRLVQZ256rrk, X86::VPSRLVQZ256rmk, 0 },
+ { X86::VPSRLVWZ256rrk, X86::VPSRLVWZ256rmk, 0 },
+ { X86::VPSRLWZ256rrk, X86::VPSRLWZ256rmk, 0 },
{ X86::VPSUBBZ256rrk, X86::VPSUBBZ256rmk, 0 },
{ X86::VPSUBDZ256rrk, X86::VPSUBDZ256rmk, 0 },
{ X86::VPSUBQZ256rrk, X86::VPSUBQZ256rmk, 0 },
@@ -2830,6 +3334,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 },
{ X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 },
{ X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 },
+ { X86::VSHUFPDZ256rrik, X86::VSHUFPDZ256rmik, 0 },
+ { X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 },
{ X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 },
{ X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 },
{ X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 },
@@ -2862,6 +3368,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
{ X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 },
{ X86::VORPSZ128rrk, X86::VORPSZ128rmk, 0 },
+ { X86::VPACKSSDWZ128rrk, X86::VPACKSSDWZ128rmk, 0 },
+ { X86::VPACKSSWBZ128rrk, X86::VPACKSSWBZ128rmk, 0 },
+ { X86::VPACKUSDWZ128rrk, X86::VPACKUSDWZ128rmk, 0 },
+ { X86::VPACKUSWBZ128rrk, X86::VPACKUSWBZ128rmk, 0 },
{ X86::VPADDBZ128rrk, X86::VPADDBZ128rmk, 0 },
{ X86::VPADDDZ128rrk, X86::VPADDDZ128rmk, 0 },
{ X86::VPADDQZ128rrk, X86::VPADDQZ128rmk, 0 },
@@ -2875,6 +3385,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZ128rrk, X86::VPANDNDZ128rmk, 0 },
{ X86::VPANDNQZ128rrk, X86::VPANDNQZ128rmk, 0 },
{ X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 },
+ { X86::VPAVGBZ128rrk, X86::VPAVGBZ128rmk, 0 },
+ { X86::VPAVGWZ128rrk, X86::VPAVGWZ128rmk, 0 },
{ X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 },
{ X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 },
{ X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 },
@@ -2893,9 +3405,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 },
{ X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 },
{ X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 },
+ { X86::VPMAXSBZ128rrk, X86::VPMAXSBZ128rmk, 0 },
+ { X86::VPMAXSDZ128rrk, X86::VPMAXSDZ128rmk, 0 },
+ { X86::VPMAXSQZ128rrk, X86::VPMAXSQZ128rmk, 0 },
+ { X86::VPMAXSWZ128rrk, X86::VPMAXSWZ128rmk, 0 },
+ { X86::VPMAXUBZ128rrk, X86::VPMAXUBZ128rmk, 0 },
+ { X86::VPMAXUDZ128rrk, X86::VPMAXUDZ128rmk, 0 },
+ { X86::VPMAXUQZ128rrk, X86::VPMAXUQZ128rmk, 0 },
+ { X86::VPMAXUWZ128rrk, X86::VPMAXUWZ128rmk, 0 },
+ { X86::VPMINSBZ128rrk, X86::VPMINSBZ128rmk, 0 },
+ { X86::VPMINSDZ128rrk, X86::VPMINSDZ128rmk, 0 },
+ { X86::VPMINSQZ128rrk, X86::VPMINSQZ128rmk, 0 },
+ { X86::VPMINSWZ128rrk, X86::VPMINSWZ128rmk, 0 },
+ { X86::VPMINUBZ128rrk, X86::VPMINUBZ128rmk, 0 },
+ { X86::VPMINUDZ128rrk, X86::VPMINUDZ128rmk, 0 },
+ { X86::VPMINUQZ128rrk, X86::VPMINUQZ128rmk, 0 },
+ { X86::VPMINUWZ128rrk, X86::VPMINUWZ128rmk, 0 },
+ { X86::VPMULDQZ128rrk, X86::VPMULDQZ128rmk, 0 },
+ { X86::VPMULLDZ128rrk, X86::VPMULLDZ128rmk, 0 },
+ { X86::VPMULLQZ128rrk, X86::VPMULLQZ128rmk, 0 },
+ { X86::VPMULLWZ128rrk, X86::VPMULLWZ128rmk, 0 },
+ { X86::VPMULUDQZ128rrk, X86::VPMULUDQZ128rmk, 0 },
{ X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 },
{ X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 },
{ X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 },
+ { X86::VPSLLDZ128rrk, X86::VPSLLDZ128rmk, 0 },
+ { X86::VPSLLQZ128rrk, X86::VPSLLQZ128rmk, 0 },
+ { X86::VPSLLVDZ128rrk, X86::VPSLLVDZ128rmk, 0 },
+ { X86::VPSLLVQZ128rrk, X86::VPSLLVQZ128rmk, 0 },
+ { X86::VPSLLVWZ128rrk, X86::VPSLLVWZ128rmk, 0 },
+ { X86::VPSLLWZ128rrk, X86::VPSLLWZ128rmk, 0 },
+ { X86::VPSRADZ128rrk, X86::VPSRADZ128rmk, 0 },
+ { X86::VPSRAQZ128rrk, X86::VPSRAQZ128rmk, 0 },
+ { X86::VPSRAVDZ128rrk, X86::VPSRAVDZ128rmk, 0 },
+ { X86::VPSRAVQZ128rrk, X86::VPSRAVQZ128rmk, 0 },
+ { X86::VPSRAVWZ128rrk, X86::VPSRAVWZ128rmk, 0 },
+ { X86::VPSRAWZ128rrk, X86::VPSRAWZ128rmk, 0 },
+ { X86::VPSRLDZ128rrk, X86::VPSRLDZ128rmk, 0 },
+ { X86::VPSRLQZ128rrk, X86::VPSRLQZ128rmk, 0 },
+ { X86::VPSRLVDZ128rrk, X86::VPSRLVDZ128rmk, 0 },
+ { X86::VPSRLVQZ128rrk, X86::VPSRLVQZ128rmk, 0 },
+ { X86::VPSRLVWZ128rrk, X86::VPSRLVWZ128rmk, 0 },
+ { X86::VPSRLWZ128rrk, X86::VPSRLWZ128rmk, 0 },
{ X86::VPSUBBZ128rrk, X86::VPSUBBZ128rmk, 0 },
{ X86::VPSUBDZ128rrk, X86::VPSUBDZ128rmk, 0 },
{ X86::VPSUBQZ128rrk, X86::VPSUBQZ128rmk, 0 },
@@ -2916,6 +3467,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLWDZ128rrk, X86::VPUNPCKLWDZ128rmk, 0 },
{ X86::VPXORDZ128rrk, X86::VPXORDZ128rmk, 0 },
{ X86::VPXORQZ128rrk, X86::VPXORQZ128rmk, 0 },
+ { X86::VSHUFPDZ128rrik, X86::VSHUFPDZ128rmik, 0 },
+ { X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmik, 0 },
{ X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
{ X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 },
{ X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 },
@@ -3063,18 +3616,13 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
- if (MI.getOpcode() == getCallFrameSetupOpcode() ||
- MI.getOpcode() == getCallFrameDestroyOpcode()) {
+ if (isFrameInstr(MI)) {
unsigned StackAlign = TFI->getStackAlignment();
- int SPAdj =
- (MI.getOperand(0).getImm() + StackAlign - 1) / StackAlign * StackAlign;
-
- SPAdj -= MI.getOperand(1).getImm();
-
- if (MI.getOpcode() == getCallFrameSetupOpcode())
- return SPAdj;
- else
- return -SPAdj;
+ int SPAdj = alignTo(getFrameSize(MI), StackAlign);
+ SPAdj -= getFrameAdjustment(MI);
+ if (!isFrameSetup(MI))
+ SPAdj = -SPAdj;
+ return SPAdj;
}
// To know whether a call adjusts the stack, we need information
@@ -3569,7 +4117,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
const DebugLoc &DL = Orig.getDebugLoc();
BuildMI(MBB, I, DL, get(X86::MOV32ri))
- .addOperand(Orig.getOperand(0))
+ .add(Orig.getOperand(0))
.addImm(Value);
} else {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
@@ -3654,10 +4202,10 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
// Virtual register of the wrong class, we have to create a temporary 64-bit
// vreg to feed into the LEA.
NewSrc = MF.getRegInfo().createVirtualRegister(RC);
- MachineInstr *Copy = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
- get(TargetOpcode::COPY))
- .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
- .addOperand(Src);
+ MachineInstr *Copy =
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
+ .add(Src);
// Which is obviously going to be dead after we're done with it.
isKill = true;
@@ -3823,10 +4371,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
- .addOperand(Dest)
+ .add(Dest)
.addReg(0)
.addImm(1ULL << ShAmt)
- .addOperand(Src)
+ .add(Src)
.addImm(0)
.addReg(0);
break;
@@ -3848,14 +4396,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstrBuilder MIB =
BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .addOperand(Dest)
+ .add(Dest)
.addReg(0)
.addImm(1ULL << ShAmt)
.addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
.addImm(0)
.addReg(0);
if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ MIB.add(ImplicitOp);
NewMI = MIB;
break;
@@ -3869,10 +4417,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
: nullptr;
NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
- .addOperand(Dest)
+ .add(Dest)
.addReg(0)
.addImm(1ULL << ShAmt)
- .addOperand(Src)
+ .add(Src)
.addImm(0)
.addReg(0);
break;
@@ -3891,11 +4439,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstrBuilder MIB =
BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .addOperand(Dest)
+ .add(Dest)
.addReg(SrcReg,
getKillRegState(isKill) | getUndefRegState(isUndef));
if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ MIB.add(ImplicitOp);
NewMI = addOffset(MIB, 1);
break;
@@ -3905,10 +4453,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
: nullptr;
assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
- NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
- .addOperand(Dest)
- .addOperand(Src),
- 1);
+ NewMI = addOffset(
+ BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), 1);
break;
case X86::DEC64r:
case X86::DEC32r: {
@@ -3924,11 +4470,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .addOperand(Dest)
+ .add(Dest)
.addReg(SrcReg, getUndefRegState(isUndef) |
getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ MIB.add(ImplicitOp);
NewMI = addOffset(MIB, -1);
@@ -3939,10 +4485,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
: nullptr;
assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
- NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
- .addOperand(Dest)
- .addOperand(Src),
- -1);
+ NewMI = addOffset(
+ BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), -1);
break;
case X86::ADD64rr:
case X86::ADD64rr_DB:
@@ -3970,12 +4514,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
SrcReg2, isKill2, isUndef2, ImplicitOp2, LV))
return nullptr;
- MachineInstrBuilder MIB =
- BuildMI(MF, MI.getDebugLoc(), get(Opc)).addOperand(Dest);
+ MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ MIB.add(ImplicitOp);
if (ImplicitOp2.getReg() != 0)
- MIB.addOperand(ImplicitOp2);
+ MIB.add(ImplicitOp2);
NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
@@ -3995,9 +4538,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Src2 = MI.getOperand(2).getReg();
bool isKill2 = MI.getOperand(2).isKill();
- NewMI = addRegReg(
- BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).addOperand(Dest),
- Src.getReg(), Src.isKill(), Src2, isKill2);
+ NewMI = addRegReg(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest),
+ Src.getReg(), Src.isKill(), Src2, isKill2);
// Preserve undefness of the operands.
bool isUndef = MI.getOperand(1).isUndef();
@@ -4014,10 +4556,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD64ri32_DB:
case X86::ADD64ri8_DB:
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
- .addOperand(Dest)
- .addOperand(Src),
- MI.getOperand(2));
+ NewMI = addOffset(
+ BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src),
+ MI.getOperand(2));
break;
case X86::ADD32ri:
case X86::ADD32ri8:
@@ -4034,11 +4575,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .addOperand(Dest)
+ .add(Dest)
.addReg(SrcReg, getUndefRegState(isUndef) |
getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ MIB.add(ImplicitOp);
NewMI = addOffset(MIB, MI.getOperand(2));
break;
@@ -4051,12 +4592,136 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
: nullptr;
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
- .addOperand(Dest)
- .addOperand(Src),
- MI.getOperand(2));
+ NewMI = addOffset(
+ BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src),
+ MI.getOperand(2));
+ break;
+
+ case X86::VMOVDQU8Z128rmk:
+ case X86::VMOVDQU8Z256rmk:
+ case X86::VMOVDQU8Zrmk:
+ case X86::VMOVDQU16Z128rmk:
+ case X86::VMOVDQU16Z256rmk:
+ case X86::VMOVDQU16Zrmk:
+ case X86::VMOVDQU32Z128rmk: case X86::VMOVDQA32Z128rmk:
+ case X86::VMOVDQU32Z256rmk: case X86::VMOVDQA32Z256rmk:
+ case X86::VMOVDQU32Zrmk: case X86::VMOVDQA32Zrmk:
+ case X86::VMOVDQU64Z128rmk: case X86::VMOVDQA64Z128rmk:
+ case X86::VMOVDQU64Z256rmk: case X86::VMOVDQA64Z256rmk:
+ case X86::VMOVDQU64Zrmk: case X86::VMOVDQA64Zrmk:
+ case X86::VMOVUPDZ128rmk: case X86::VMOVAPDZ128rmk:
+ case X86::VMOVUPDZ256rmk: case X86::VMOVAPDZ256rmk:
+ case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk:
+ case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
+ case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
+ case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: {
+ unsigned Opc;
+ switch (MIOpc) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
+ case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
+ case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
+ case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
+ case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
+ case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
+ case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
+ case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
+ case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
+ case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
+ case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
+ case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
+ case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
+ case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
+ case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
+ case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
+ case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
+ case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
+ case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
+ case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
+ case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
+ case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
+ case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
+ case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
+ case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
+ case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
+ case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
+ case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
+ case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ }
+
+ NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
+ .add(Dest)
+ .add(MI.getOperand(2))
+ .add(Src)
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5))
+ .add(MI.getOperand(6))
+ .add(MI.getOperand(7));
break;
}
+ case X86::VMOVDQU8Z128rrk:
+ case X86::VMOVDQU8Z256rrk:
+ case X86::VMOVDQU8Zrrk:
+ case X86::VMOVDQU16Z128rrk:
+ case X86::VMOVDQU16Z256rrk:
+ case X86::VMOVDQU16Zrrk:
+ case X86::VMOVDQU32Z128rrk: case X86::VMOVDQA32Z128rrk:
+ case X86::VMOVDQU32Z256rrk: case X86::VMOVDQA32Z256rrk:
+ case X86::VMOVDQU32Zrrk: case X86::VMOVDQA32Zrrk:
+ case X86::VMOVDQU64Z128rrk: case X86::VMOVDQA64Z128rrk:
+ case X86::VMOVDQU64Z256rrk: case X86::VMOVDQA64Z256rrk:
+ case X86::VMOVDQU64Zrrk: case X86::VMOVDQA64Zrrk:
+ case X86::VMOVUPDZ128rrk: case X86::VMOVAPDZ128rrk:
+ case X86::VMOVUPDZ256rrk: case X86::VMOVAPDZ256rrk:
+ case X86::VMOVUPDZrrk: case X86::VMOVAPDZrrk:
+ case X86::VMOVUPSZ128rrk: case X86::VMOVAPSZ128rrk:
+ case X86::VMOVUPSZ256rrk: case X86::VMOVAPSZ256rrk:
+ case X86::VMOVUPSZrrk: case X86::VMOVAPSZrrk: {
+ unsigned Opc;
+ switch (MIOpc) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::VMOVDQU8Z128rrk: Opc = X86::VPBLENDMBZ128rrk; break;
+ case X86::VMOVDQU8Z256rrk: Opc = X86::VPBLENDMBZ256rrk; break;
+ case X86::VMOVDQU8Zrrk: Opc = X86::VPBLENDMBZrrk; break;
+ case X86::VMOVDQU16Z128rrk: Opc = X86::VPBLENDMWZ128rrk; break;
+ case X86::VMOVDQU16Z256rrk: Opc = X86::VPBLENDMWZ256rrk; break;
+ case X86::VMOVDQU16Zrrk: Opc = X86::VPBLENDMWZrrk; break;
+ case X86::VMOVDQU32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
+ case X86::VMOVDQU32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
+ case X86::VMOVDQU32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
+ case X86::VMOVDQU64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
+ case X86::VMOVDQU64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
+ case X86::VMOVDQU64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
+ case X86::VMOVUPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
+ case X86::VMOVUPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
+ case X86::VMOVUPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
+ case X86::VMOVUPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
+ case X86::VMOVUPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
+ case X86::VMOVUPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
+ case X86::VMOVDQA32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
+ case X86::VMOVDQA32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
+ case X86::VMOVDQA32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
+ case X86::VMOVDQA64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
+ case X86::VMOVDQA64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
+ case X86::VMOVDQA64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
+ case X86::VMOVAPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
+ case X86::VMOVAPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
+ case X86::VMOVAPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
+ case X86::VMOVAPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
+ case X86::VMOVAPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
+ case X86::VMOVAPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
+ }
+
+ NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
+ .add(Dest)
+ .add(MI.getOperand(2))
+ .add(Src)
+ .add(MI.getOperand(3));
+ break;
+ }
+ }
if (!NewMI) return nullptr;
@@ -4337,6 +5002,18 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
+ case X86::PFSUBrr:
+ case X86::PFSUBRrr: {
+ // PFSUB x, y: x = x - y
+ // PFSUBR x, y: x = y - x
+ unsigned Opc =
+ (X86::PFSUBRrr == MI.getOpcode() ? X86::PFSUBrr : X86::PFSUBRrr);
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(Opc));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ break;
+ }
case X86::BLENDPDrri:
case X86::BLENDPSrri:
case X86::PBLENDWrri:
@@ -4606,18 +5283,30 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
- case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik:
- case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik:
- case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik:
- case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik:
- case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik:
- case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik:
+ case X86::VPTERNLOGDZrrik:
+ case X86::VPTERNLOGDZ128rrik:
+ case X86::VPTERNLOGDZ256rrik:
+ case X86::VPTERNLOGQZrrik:
+ case X86::VPTERNLOGQZ128rrik:
+ case X86::VPTERNLOGQZ256rrik:
case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
- case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: {
+ case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
+ case X86::VPTERNLOGDZ128rmbi:
+ case X86::VPTERNLOGDZ256rmbi:
+ case X86::VPTERNLOGDZrmbi:
+ case X86::VPTERNLOGQZ128rmbi:
+ case X86::VPTERNLOGQZ256rmbi:
+ case X86::VPTERNLOGQZrmbi:
+ case X86::VPTERNLOGDZ128rmbikz:
+ case X86::VPTERNLOGDZ256rmbikz:
+ case X86::VPTERNLOGDZrmbikz:
+ case X86::VPTERNLOGQZ128rmbikz:
+ case X86::VPTERNLOGQZ256rmbikz:
+ case X86::VPTERNLOGQZrmbikz: {
auto &WorkingMI = cloneIfNew(MI);
if (!commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2))
return nullptr;
@@ -4798,18 +5487,30 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
- case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik:
- case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik:
- case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik:
- case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik:
- case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik:
- case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik:
+ case X86::VPTERNLOGDZrrik:
+ case X86::VPTERNLOGDZ128rrik:
+ case X86::VPTERNLOGDZ256rrik:
+ case X86::VPTERNLOGQZrrik:
+ case X86::VPTERNLOGQZ128rrik:
+ case X86::VPTERNLOGQZ256rrik:
case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
+ case X86::VPTERNLOGDZ128rmbi:
+ case X86::VPTERNLOGDZ256rmbi:
+ case X86::VPTERNLOGDZrmbi:
+ case X86::VPTERNLOGQZ128rmbi:
+ case X86::VPTERNLOGQZ256rmbi:
+ case X86::VPTERNLOGQZrmbi:
+ case X86::VPTERNLOGDZ128rmbikz:
+ case X86::VPTERNLOGDZ256rmbikz:
+ case X86::VPTERNLOGDZrmbikz:
+ case X86::VPTERNLOGQZ128rmbikz:
+ case X86::VPTERNLOGQZ256rmbikz:
+ case X86::VPTERNLOGQZrmbikz:
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
default:
const X86InstrFMA3Group *FMA3Group =
@@ -5108,6 +5809,95 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
return !isPredicated(MI);
}
+bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool X86InstrInfo::canMakeTailCallConditional(
+ SmallVectorImpl<MachineOperand> &BranchCond,
+ const MachineInstr &TailCall) const {
+ if (TailCall.getOpcode() != X86::TCRETURNdi &&
+ TailCall.getOpcode() != X86::TCRETURNdi64) {
+ // Only direct calls can be done with a conditional branch.
+ return false;
+ }
+
+ const MachineFunction *MF = TailCall.getParent()->getParent();
+ if (Subtarget.isTargetWin64() && MF->hasWinCFI()) {
+ // Conditional tail calls confuse the Win64 unwinder.
+ return false;
+ }
+
+ assert(BranchCond.size() == 1);
+ if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
+ // Can't make a conditional tail call with this condition.
+ return false;
+ }
+
+ const X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+ if (X86FI->getTCReturnAddrDelta() != 0 ||
+ TailCall.getOperand(1).getImm() != 0) {
+ // A conditional tail call cannot do any stack adjustment.
+ return false;
+ }
+
+ return true;
+}
+
+void X86InstrInfo::replaceBranchWithTailCall(
+ MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond,
+ const MachineInstr &TailCall) const {
+ assert(canMakeTailCallConditional(BranchCond, TailCall));
+
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (!I->isBranch())
+ assert(0 && "Can't find the branch to replace!");
+
+ X86::CondCode CC = getCondFromBranchOpc(I->getOpcode());
+ assert(BranchCond.size() == 1);
+ if (CC != BranchCond[0].getImm())
+ continue;
+
+ break;
+ }
+
+ unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
+ : X86::TCRETURNdi64cc;
+
+ auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
+ MIB->addOperand(TailCall.getOperand(0)); // Destination.
+ MIB.addImm(0); // Stack offset (not used).
+ MIB->addOperand(BranchCond[0]); // Condition.
+ MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
+
+ // Add implicit uses and defs of all live regs potentially clobbered by the
+ // call. This way they still appear live across the call.
+ LivePhysRegs LiveRegs(&getRegisterInfo());
+ LiveRegs.addLiveOuts(MBB);
+ SmallVector<std::pair<unsigned, const MachineOperand *>, 8> Clobbers;
+ LiveRegs.stepForward(*MIB, Clobbers);
+ for (const auto &C : Clobbers) {
+ MIB.addReg(C.first, RegState::Implicit);
+ MIB.addReg(C.first, RegState::Implicit | RegState::Define);
+ }
+
+ I->eraseFromParent();
+}
+
// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
// not be a fallthrough MBB now due to layout changes). Return nullptr if the
// fallthrough MBB cannot be identified.
@@ -5514,8 +6304,6 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
// SrcReg(MaskReg) -> DestReg(GR64)
// SrcReg(MaskReg) -> DestReg(GR32)
- // SrcReg(MaskReg) -> DestReg(GR16)
- // SrcReg(MaskReg) -> DestReg(GR8)
// All KMASK RegClasses hold the same k registers, can be tested against anyone.
if (X86::VK16RegClass.contains(SrcReg)) {
@@ -5525,20 +6313,10 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
}
if (X86::GR32RegClass.contains(DestReg))
return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
- if (X86::GR16RegClass.contains(DestReg)) {
- DestReg = getX86SubSuperRegister(DestReg, 32);
- return X86::KMOVWrk;
- }
- if (X86::GR8RegClass.contains(DestReg)) {
- DestReg = getX86SubSuperRegister(DestReg, 32);
- return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk;
- }
}
// SrcReg(GR64) -> DestReg(MaskReg)
// SrcReg(GR32) -> DestReg(MaskReg)
- // SrcReg(GR16) -> DestReg(MaskReg)
- // SrcReg(GR8) -> DestReg(MaskReg)
// All KMASK RegClasses hold the same k registers, can be tested against anyone.
if (X86::VK16RegClass.contains(DestReg)) {
@@ -5548,14 +6326,6 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
}
if (X86::GR32RegClass.contains(SrcReg))
return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
- if (X86::GR16RegClass.contains(SrcReg)) {
- SrcReg = getX86SubSuperRegister(SrcReg, 32);
- return X86::KMOVWkr;
- }
- if (X86::GR8RegClass.contains(SrcReg)) {
- SrcReg = getX86SubSuperRegister(SrcReg, 32);
- return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr;
- }
}
@@ -5965,7 +6735,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
- MIB.addOperand(Addr[i]);
+ MIB.add(Addr[i]);
MIB.addReg(SrcReg, getKillRegState(isKill));
(*MIB).setMemRefs(MMOBegin, MMOEnd);
NewMIs.push_back(MIB);
@@ -6000,7 +6770,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
- MIB.addOperand(Addr[i]);
+ MIB.add(Addr[i]);
(*MIB).setMemRefs(MMOBegin, MMOEnd);
NewMIs.push_back(MIB);
}
@@ -6017,12 +6787,14 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case X86::CMP16ri:
case X86::CMP16ri8:
case X86::CMP8ri:
- if (!MI.getOperand(1).isImm())
- return false;
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = 0;
- CmpMask = ~0;
- CmpValue = MI.getOperand(1).getImm();
+ if (MI.getOperand(1).isImm()) {
+ CmpMask = ~0;
+ CmpValue = MI.getOperand(1).getImm();
+ } else {
+ CmpMask = CmpValue = 0;
+ }
return true;
// A SUB can be used to perform comparison.
case X86::SUB64rm:
@@ -6031,7 +6803,7 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case X86::SUB8rm:
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
- CmpMask = ~0;
+ CmpMask = 0;
CmpValue = 0;
return true;
case X86::SUB64rr:
@@ -6040,7 +6812,7 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case X86::SUB8rr:
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = MI.getOperand(2).getReg();
- CmpMask = ~0;
+ CmpMask = 0;
CmpValue = 0;
return true;
case X86::SUB64ri32:
@@ -6050,12 +6822,14 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case X86::SUB16ri:
case X86::SUB16ri8:
case X86::SUB8ri:
- if (!MI.getOperand(2).isImm())
- return false;
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
- CmpMask = ~0;
- CmpValue = MI.getOperand(2).getImm();
+ if (MI.getOperand(2).isImm()) {
+ CmpMask = ~0;
+ CmpValue = MI.getOperand(2).getImm();
+ } else {
+ CmpMask = CmpValue = 0;
+ }
return true;
case X86::CMP64rr:
case X86::CMP32rr:
@@ -6063,7 +6837,7 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case X86::CMP8rr:
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = MI.getOperand(1).getReg();
- CmpMask = ~0;
+ CmpMask = 0;
CmpValue = 0;
return true;
case X86::TEST8rr:
@@ -6089,8 +6863,8 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
/// SrcReg, SrcRegs: register operands for FlagI.
/// ImmValue: immediate for FlagI if it takes an immediate.
inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg,
- unsigned SrcReg2, int ImmValue,
- MachineInstr &OI) {
+ unsigned SrcReg2, int ImmMask,
+ int ImmValue, MachineInstr &OI) {
if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
(FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
(FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
@@ -6101,7 +6875,8 @@ inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg,
OI.getOperand(2).getReg() == SrcReg)))
return true;
- if (((FlagI.getOpcode() == X86::CMP64ri32 &&
+ if (ImmMask != 0 &&
+ ((FlagI.getOpcode() == X86::CMP64ri32 &&
OI.getOpcode() == X86::SUB64ri32) ||
(FlagI.getOpcode() == X86::CMP64ri8 &&
OI.getOpcode() == X86::SUB64ri8) ||
@@ -6288,7 +7063,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// If we are comparing against zero, check whether we can use MI to update
// EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
- bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0);
+ bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
if (IsCmpZero && MI->getParent() != CmpInstr.getParent())
return false;
@@ -6338,8 +7113,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
for (; RI != RE; ++RI) {
MachineInstr &Instr = *RI;
// Check whether CmpInstr can be made redundant by the current instruction.
- if (!IsCmpZero &&
- isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) {
+ if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
+ CmpValue, Instr)) {
Sub = &Instr;
break;
}
@@ -6764,14 +7539,33 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
assert(HasAVX && "AVX not supported");
return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
case X86::AVX512_128_SET0:
- return Expand2AddrUndef(MIB, get(X86::VPXORDZ128rr));
- case X86::AVX512_256_SET0:
- return Expand2AddrUndef(MIB, get(X86::VPXORDZ256rr));
+ case X86::AVX512_FsFLD0SS:
+ case X86::AVX512_FsFLD0SD: {
+ bool HasVLX = Subtarget.hasVLX();
+ unsigned SrcReg = MIB->getOperand(0).getReg();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
+ return Expand2AddrUndef(MIB,
+ get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
+ // Extended register without VLX. Use a larger XOR.
+ SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
+ MIB->getOperand(0).setReg(SrcReg);
+ return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
+ }
+ case X86::AVX512_256_SET0: {
+ bool HasVLX = Subtarget.hasVLX();
+ unsigned SrcReg = MIB->getOperand(0).getReg();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
+ return Expand2AddrUndef(MIB,
+ get(HasVLX ? X86::VPXORDZ256rr : X86::VXORPSYrr));
+ // Extended register without VLX. Use a larger XOR.
+ SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
+ MIB->getOperand(0).setReg(SrcReg);
+ return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
+ }
case X86::AVX512_512_SET0:
return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
- case X86::AVX512_FsFLD0SS:
- case X86::AVX512_FsFLD0SD:
- return Expand2AddrUndef(MIB, get(X86::VXORPSZ128rr));
case X86::V_SETALLONES:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
@@ -6838,11 +7632,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// registers, since it is not usable as a write mask.
// FIXME: A more advanced approach would be to choose the best input mask
// register based on context.
- case X86::KSET0B:
case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
- case X86::KSET1B:
case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
@@ -6860,7 +7652,7 @@ static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs,
if (NumAddrOps < 4) {
// FrameIndex only - add an immediate offset (whether its zero or not).
for (unsigned i = 0; i != NumAddrOps; ++i)
- MIB.addOperand(MOs[i]);
+ MIB.add(MOs[i]);
addOffset(MIB, PtrOffset);
} else {
// General Memory Addressing - we need to add any offset to an existing
@@ -6871,7 +7663,7 @@ static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs,
if (i == 3 && PtrOffset != 0) {
MIB.addDisp(MO, PtrOffset);
} else {
- MIB.addOperand(MO);
+ MIB.add(MO);
}
}
}
@@ -6893,11 +7685,11 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
unsigned NumOps = MI.getDesc().getNumOperands() - 2;
for (unsigned i = 0; i != NumOps; ++i) {
MachineOperand &MO = MI.getOperand(i + 2);
- MIB.addOperand(MO);
+ MIB.add(MO);
}
for (unsigned i = NumOps + 2, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- MIB.addOperand(MO);
+ MIB.add(MO);
}
MachineBasicBlock *MBB = InsertPt->getParent();
@@ -6922,7 +7714,7 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
assert(MO.isReg() && "Expected to fold into reg operand!");
addOperands(MIB, MOs, PtrOffset);
} else {
- MIB.addOperand(MO);
+ MIB.add(MO);
}
}
@@ -7226,7 +8018,7 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
return false;
}
-/// Inform the ExeDepsFix pass how many idle
+/// Inform the ExecutionDepsFix pass how many idle
/// instructions we would like before a partial register update.
unsigned X86InstrInfo::getPartialRegUpdateClearance(
const MachineInstr &MI, unsigned OpNum,
@@ -7344,11 +8136,15 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
case X86::VCVTUSI642SDZrrb_Int:
case X86::VCVTUSI642SDZrm_Int:
case X86::VCVTSD2SSZrr:
- case X86::VCVTSD2SSZrrb:
+ case X86::VCVTSD2SSZrr_Int:
+ case X86::VCVTSD2SSZrrb_Int:
case X86::VCVTSD2SSZrm:
+ case X86::VCVTSD2SSZrm_Int:
case X86::VCVTSS2SDZrr:
- case X86::VCVTSS2SDZrrb:
+ case X86::VCVTSS2SDZrr_Int:
+ case X86::VCVTSS2SDZrrb_Int:
case X86::VCVTSS2SDZrm:
+ case X86::VCVTSS2SDZrm_Int:
case X86::VRNDSCALESDr:
case X86::VRNDSCALESDrb:
case X86::VRNDSCALESDm:
@@ -7375,8 +8171,8 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
return false;
}
-/// Inform the ExeDepsFix pass how many idle instructions we would like before
-/// certain undef register reads.
+/// Inform the ExecutionDepsFix pass how many idle instructions we would like
+/// before certain undef register reads.
///
/// This catches the VCVTSI2SD family of instructions:
///
@@ -7522,6 +8318,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int:
case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int:
case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
+ case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz:
+ case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz:
+ case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz:
+ case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz:
+ case X86::VMULSSZrr_Intk: case X86::VMULSSZrr_Intkz:
+ case X86::VSUBSSZrr_Intk: case X86::VSUBSSZrr_Intkz:
case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int:
case X86::VFMSUBSS4rr_Int: case X86::VFNMSUBSS4rr_Int:
case X86::VFMADD132SSr_Int: case X86::VFNMADD132SSr_Int:
@@ -7536,6 +8338,18 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int:
case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int:
case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int:
+ case X86::VFMADD132SSZr_Intk: case X86::VFNMADD132SSZr_Intk:
+ case X86::VFMADD213SSZr_Intk: case X86::VFNMADD213SSZr_Intk:
+ case X86::VFMADD231SSZr_Intk: case X86::VFNMADD231SSZr_Intk:
+ case X86::VFMSUB132SSZr_Intk: case X86::VFNMSUB132SSZr_Intk:
+ case X86::VFMSUB213SSZr_Intk: case X86::VFNMSUB213SSZr_Intk:
+ case X86::VFMSUB231SSZr_Intk: case X86::VFNMSUB231SSZr_Intk:
+ case X86::VFMADD132SSZr_Intkz: case X86::VFNMADD132SSZr_Intkz:
+ case X86::VFMADD213SSZr_Intkz: case X86::VFNMADD213SSZr_Intkz:
+ case X86::VFMADD231SSZr_Intkz: case X86::VFNMADD231SSZr_Intkz:
+ case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz:
+ case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz:
+ case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz:
return false;
default:
return true;
@@ -7555,6 +8369,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int:
case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int:
case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
+ case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz:
+ case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz:
+ case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz:
+ case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz:
+ case X86::VMULSDZrr_Intk: case X86::VMULSDZrr_Intkz:
+ case X86::VSUBSDZrr_Intk: case X86::VSUBSDZrr_Intkz:
case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int:
case X86::VFMSUBSD4rr_Int: case X86::VFNMSUBSD4rr_Int:
case X86::VFMADD132SDr_Int: case X86::VFNMADD132SDr_Int:
@@ -7569,6 +8389,18 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int:
case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int:
case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int:
+ case X86::VFMADD132SDZr_Intk: case X86::VFNMADD132SDZr_Intk:
+ case X86::VFMADD213SDZr_Intk: case X86::VFNMADD213SDZr_Intk:
+ case X86::VFMADD231SDZr_Intk: case X86::VFNMADD231SDZr_Intk:
+ case X86::VFMSUB132SDZr_Intk: case X86::VFNMSUB132SDZr_Intk:
+ case X86::VFMSUB213SDZr_Intk: case X86::VFNMSUB213SDZr_Intk:
+ case X86::VFMSUB231SDZr_Intk: case X86::VFNMSUB231SDZr_Intk:
+ case X86::VFMADD132SDZr_Intkz: case X86::VFNMADD132SDZr_Intkz:
+ case X86::VFMADD213SDZr_Intkz: case X86::VFNMADD213SDZr_Intkz:
+ case X86::VFMADD231SDZr_Intkz: case X86::VFNMADD231SDZr_Intkz:
+ case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz:
+ case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz:
+ case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz:
return false;
default:
return true;
@@ -7800,11 +8632,11 @@ bool X86InstrInfo::unfoldMemoryOperand(
if (FoldedStore)
MIB.addReg(Reg, RegState::Define);
for (MachineOperand &BeforeOp : BeforeOps)
- MIB.addOperand(BeforeOp);
+ MIB.add(BeforeOp);
if (FoldedLoad)
MIB.addReg(Reg);
for (MachineOperand &AfterOp : AfterOps)
- MIB.addOperand(AfterOp);
+ MIB.add(AfterOp);
for (MachineOperand &ImpOp : ImpOps) {
MIB.addReg(ImpOp.getReg(),
getDefRegState(ImpOp.isDef()) |
@@ -8143,28 +8975,29 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
break;
}
- // Check if chain operands and base addresses match.
- if (Load1->getOperand(0) != Load2->getOperand(0) ||
- Load1->getOperand(5) != Load2->getOperand(5))
+ // Lambda to check if both the loads have the same value for an operand index.
+ auto HasSameOp = [&](int I) {
+ return Load1->getOperand(I) == Load2->getOperand(I);
+ };
+
+ // All operands except the displacement should match.
+ if (!HasSameOp(X86::AddrBaseReg) || !HasSameOp(X86::AddrScaleAmt) ||
+ !HasSameOp(X86::AddrIndexReg) || !HasSameOp(X86::AddrSegmentReg))
return false;
- // Segment operands should match as well.
- if (Load1->getOperand(4) != Load2->getOperand(4))
+
+ // Chain Operand must be the same.
+ if (!HasSameOp(5))
return false;
- // Scale should be 1, Index should be Reg0.
- if (Load1->getOperand(1) == Load2->getOperand(1) &&
- Load1->getOperand(2) == Load2->getOperand(2)) {
- if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
- return false;
- // Now let's examine the displacements.
- if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
- isa<ConstantSDNode>(Load2->getOperand(3))) {
- Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();
- Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();
- return true;
- }
- }
- return false;
+ // Now let's examine if the displacements are constants.
+ auto Disp1 = dyn_cast<ConstantSDNode>(Load1->getOperand(X86::AddrDisp));
+ auto Disp2 = dyn_cast<ConstantSDNode>(Load2->getOperand(X86::AddrDisp));
+ if (!Disp1 || !Disp2)
+ return false;
+
+ Offset1 = Disp1->getSExtValue();
+ Offset2 = Disp2->getSExtValue();
+ return true;
}
bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
@@ -8215,165 +9048,6 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
return true;
}
-bool X86InstrInfo::shouldScheduleAdjacent(const MachineInstr &First,
- const MachineInstr &Second) const {
- // Check if this processor supports macro-fusion. Since this is a minor
- // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
- // proxy for SandyBridge+.
- if (!Subtarget.hasAVX())
- return false;
-
- enum {
- FuseTest,
- FuseCmp,
- FuseInc
- } FuseKind;
-
- switch (Second.getOpcode()) {
- default:
- return false;
- case X86::JE_1:
- case X86::JNE_1:
- case X86::JL_1:
- case X86::JLE_1:
- case X86::JG_1:
- case X86::JGE_1:
- FuseKind = FuseInc;
- break;
- case X86::JB_1:
- case X86::JBE_1:
- case X86::JA_1:
- case X86::JAE_1:
- FuseKind = FuseCmp;
- break;
- case X86::JS_1:
- case X86::JNS_1:
- case X86::JP_1:
- case X86::JNP_1:
- case X86::JO_1:
- case X86::JNO_1:
- FuseKind = FuseTest;
- break;
- }
- switch (First.getOpcode()) {
- default:
- return false;
- case X86::TEST8rr:
- case X86::TEST16rr:
- case X86::TEST32rr:
- case X86::TEST64rr:
- case X86::TEST8ri:
- case X86::TEST16ri:
- case X86::TEST32ri:
- case X86::TEST32i32:
- case X86::TEST64i32:
- case X86::TEST64ri32:
- case X86::TEST8rm:
- case X86::TEST16rm:
- case X86::TEST32rm:
- case X86::TEST64rm:
- case X86::TEST8ri_NOREX:
- case X86::AND16i16:
- case X86::AND16ri:
- case X86::AND16ri8:
- case X86::AND16rm:
- case X86::AND16rr:
- case X86::AND32i32:
- case X86::AND32ri:
- case X86::AND32ri8:
- case X86::AND32rm:
- case X86::AND32rr:
- case X86::AND64i32:
- case X86::AND64ri32:
- case X86::AND64ri8:
- case X86::AND64rm:
- case X86::AND64rr:
- case X86::AND8i8:
- case X86::AND8ri:
- case X86::AND8rm:
- case X86::AND8rr:
- return true;
- case X86::CMP16i16:
- case X86::CMP16ri:
- case X86::CMP16ri8:
- case X86::CMP16rm:
- case X86::CMP16rr:
- case X86::CMP32i32:
- case X86::CMP32ri:
- case X86::CMP32ri8:
- case X86::CMP32rm:
- case X86::CMP32rr:
- case X86::CMP64i32:
- case X86::CMP64ri32:
- case X86::CMP64ri8:
- case X86::CMP64rm:
- case X86::CMP64rr:
- case X86::CMP8i8:
- case X86::CMP8ri:
- case X86::CMP8rm:
- case X86::CMP8rr:
- case X86::ADD16i16:
- case X86::ADD16ri:
- case X86::ADD16ri8:
- case X86::ADD16ri8_DB:
- case X86::ADD16ri_DB:
- case X86::ADD16rm:
- case X86::ADD16rr:
- case X86::ADD16rr_DB:
- case X86::ADD32i32:
- case X86::ADD32ri:
- case X86::ADD32ri8:
- case X86::ADD32ri8_DB:
- case X86::ADD32ri_DB:
- case X86::ADD32rm:
- case X86::ADD32rr:
- case X86::ADD32rr_DB:
- case X86::ADD64i32:
- case X86::ADD64ri32:
- case X86::ADD64ri32_DB:
- case X86::ADD64ri8:
- case X86::ADD64ri8_DB:
- case X86::ADD64rm:
- case X86::ADD64rr:
- case X86::ADD64rr_DB:
- case X86::ADD8i8:
- case X86::ADD8mi:
- case X86::ADD8mr:
- case X86::ADD8ri:
- case X86::ADD8rm:
- case X86::ADD8rr:
- case X86::SUB16i16:
- case X86::SUB16ri:
- case X86::SUB16ri8:
- case X86::SUB16rm:
- case X86::SUB16rr:
- case X86::SUB32i32:
- case X86::SUB32ri:
- case X86::SUB32ri8:
- case X86::SUB32rm:
- case X86::SUB32rr:
- case X86::SUB64i32:
- case X86::SUB64ri32:
- case X86::SUB64ri8:
- case X86::SUB64rm:
- case X86::SUB64rr:
- case X86::SUB8i8:
- case X86::SUB8ri:
- case X86::SUB8rm:
- case X86::SUB8rr:
- return FuseKind == FuseCmp || FuseKind == FuseInc;
- case X86::INC16r:
- case X86::INC32r:
- case X86::INC64r:
- case X86::INC8r:
- case X86::DEC16r:
- case X86::DEC32r:
- case X86::DEC64r:
- case X86::DEC8r:
- return FuseKind == FuseInc;
- }
-}
-
bool X86InstrInfo::
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 1 && "Invalid X86 branch condition!");
@@ -8424,6 +9098,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
{ X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
{ X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr },
+ { X86::MOVSDmr, X86::MOVSDmr, X86::MOVPQI2QImr },
{ X86::MOVSSmr, X86::MOVSSmr, X86::MOVPDI2DImr },
{ X86::MOVSDrm, X86::MOVSDrm, X86::MOVQI2PQIrm },
{ X86::MOVSSrm, X86::MOVSSrm, X86::MOVDI2PDIrm },
@@ -8443,6 +9118,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
{ X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
{ X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr },
+ { X86::VMOVSDmr, X86::VMOVSDmr, X86::VMOVPQI2QImr },
{ X86::VMOVSSmr, X86::VMOVSSmr, X86::VMOVPDI2DImr },
{ X86::VMOVSDrm, X86::VMOVSDrm, X86::VMOVQI2PQIrm },
{ X86::VMOVSSrm, X86::VMOVSSrm, X86::VMOVDI2PDIrm },
@@ -8465,7 +9141,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
// AVX512 support
{ X86::VMOVLPSZ128mr, X86::VMOVLPDZ128mr, X86::VMOVPQI2QIZmr },
{ X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
- { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
+ { X86::VMOVNTPSZ256mr, X86::VMOVNTPDZ256mr, X86::VMOVNTDQZ256mr },
{ X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr },
{ X86::VMOVSDZmr, X86::VMOVSDZmr, X86::VMOVPQI2QIZmr },
{ X86::VMOVSSZmr, X86::VMOVSSZmr, X86::VMOVPDI2DIZmr },
@@ -8493,10 +9169,6 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
{ X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
{ X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
{ X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
- { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
- { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
- { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
- { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
{ X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
{ X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr },
{ X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm},
@@ -8508,6 +9180,14 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
{ X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 },
};
+static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
+ { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
+ { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
+};
+
static const uint16_t ReplaceableInstrsAVX512[][4] = {
// Two integer columns for 64-bit and 32-bit elements.
//PackedSingle PackedDouble PackedInt PackedInt
@@ -8769,16 +9449,25 @@ X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
validDomains = 0xe;
} else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
+ } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) {
+ // Insert/extract instructions should only effect domain if AVX2
+ // is enabled.
+ if (!Subtarget.hasAVX2())
+ return std::make_pair(0, 0);
+ validDomains = 0xe;
} else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) {
validDomains = 0xe;
- } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQ)) {
- validDomains = Subtarget.hasDQI() ? 0xe : 0x8;
- } else if (const uint16_t *table = lookupAVX512(opcode, domain,
+ } else if (Subtarget.hasDQI() && lookupAVX512(opcode, domain,
+ ReplaceableInstrsAVX512DQ)) {
+ validDomains = 0xe;
+ } else if (Subtarget.hasDQI()) {
+ if (const uint16_t *table = lookupAVX512(opcode, domain,
ReplaceableInstrsAVX512DQMasked)) {
- if (domain == 1 || (domain == 3 && table[3] == opcode))
- validDomains = Subtarget.hasDQI() ? 0xa : 0x8;
- else
- validDomains = Subtarget.hasDQI() ? 0xc : 0x8;
+ if (domain == 1 || (domain == 3 && table[3] == opcode))
+ validDomains = 0xa;
+ else
+ validDomains = 0xc;
+ }
}
}
return std::make_pair(domain, validDomains);
@@ -8794,6 +9483,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
"256-bit vector operations only available in AVX2");
table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2);
}
+ if (!table) { // try the other table
+ assert(Subtarget.hasAVX2() &&
+ "256-bit insert/extract only available in AVX2");
+ table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract);
+ }
if (!table) { // try the AVX512 table
assert(Subtarget.hasAVX512() && "Requires AVX-512");
table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512);
@@ -9457,28 +10151,6 @@ X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
-bool X86InstrInfo::isTailCall(const MachineInstr &Inst) const {
- switch (Inst.getOpcode()) {
- case X86::TCRETURNdi:
- case X86::TCRETURNmi:
- case X86::TCRETURNri:
- case X86::TCRETURNdi64:
- case X86::TCRETURNmi64:
- case X86::TCRETURNri64:
- case X86::TAILJMPd:
- case X86::TAILJMPm:
- case X86::TAILJMPr:
- case X86::TAILJMPd64:
- case X86::TAILJMPm64:
- case X86::TAILJMPr64:
- case X86::TAILJMPm64_REX:
- case X86::TAILJMPr64_REX:
- return true;
- default:
- return false;
- }
-}
-
namespace {
/// Create Global Base Reg pass. This initializes the PIC
/// global base register for x86-32.
@@ -9665,3 +10337,124 @@ namespace {
char LDTLSCleanup::ID = 0;
FunctionPass*
llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
+
+unsigned X86InstrInfo::getOutliningBenefit(size_t SequenceSize,
+ size_t Occurrences,
+ bool CanBeTailCall) const {
+ unsigned NotOutlinedSize = SequenceSize * Occurrences;
+ unsigned OutlinedSize;
+
+ // Is it a tail call?
+ if (CanBeTailCall) {
+ // If yes, we don't have to include a return instruction-- it's already in
+ // our sequence. So we have one occurrence of the sequence + #Occurrences
+ // calls.
+ OutlinedSize = SequenceSize + Occurrences;
+ } else {
+ // If not, add one for the return instruction.
+ OutlinedSize = (SequenceSize + 1) + Occurrences;
+ }
+
+ // Return the number of instructions saved by outlining this sequence.
+ return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0;
+}
+
+bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
+ return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+}
+
+X86GenInstrInfo::MachineOutlinerInstrType
+X86InstrInfo::getOutliningType(MachineInstr &MI) const {
+
+ // Don't allow debug values to impact outlining type.
+ if (MI.isDebugValue() || MI.isIndirectDebugValue())
+ return MachineOutlinerInstrType::Invisible;
+
+ // Is this a tail call? If yes, we can outline as a tail call.
+ if (isTailCall(MI))
+ return MachineOutlinerInstrType::Legal;
+
+ // Is this the terminator of a basic block?
+ if (MI.isTerminator() || MI.isReturn()) {
+
+ // Does its parent have any successors in its MachineFunction?
+ if (MI.getParent()->succ_empty())
+ return MachineOutlinerInstrType::Legal;
+
+ // It does, so we can't tail call it.
+ return MachineOutlinerInstrType::Illegal;
+ }
+
+ // Don't outline anything that modifies or reads from the stack pointer.
+ //
+ // FIXME: There are instructions which are being manually built without
+ // explicit uses/defs so we also have to check the MCInstrDesc. We should be
+ // able to remove the extra checks once those are fixed up. For example,
+ // sometimes we might get something like %RAX<def> = POP64r 1. This won't be
+ // caught by modifiesRegister or readsRegister even though the instruction
+ // really ought to be formed so that modifiesRegister/readsRegister would
+ // catch it.
+ if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) ||
+ MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) ||
+ MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP))
+ return MachineOutlinerInstrType::Illegal;
+
+ // Outlined calls change the instruction pointer, so don't read from it.
+ if (MI.readsRegister(X86::RIP, &RI) ||
+ MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) ||
+ MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP))
+ return MachineOutlinerInstrType::Illegal;
+
+ // Positions can't safely be outlined.
+ if (MI.isPosition())
+ return MachineOutlinerInstrType::Illegal;
+
+ // Make sure none of the operands of this instruction do anything tricky.
+ for (const MachineOperand &MOP : MI.operands())
+ if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
+ MOP.isTargetIndex())
+ return MachineOutlinerInstrType::Illegal;
+
+ return MachineOutlinerInstrType::Legal;
+}
+
+void X86InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const {
+
+ // If we're a tail call, we already have a return, so don't do anything.
+ if (IsTailCall)
+ return;
+
+ // We're a normal call, so our sequence doesn't have a return instruction.
+ // Add it in.
+ MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RETQ));
+ MBB.insert(MBB.end(), retq);
+}
+
+void X86InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const {
+ return;
+}
+
+MachineBasicBlock::iterator
+X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &It,
+ MachineFunction &MF,
+ bool IsTailCall) const {
+ // Is it a tail call?
+ if (IsTailCall) {
+ // Yes, just insert a JMP.
+ It = MBB.insert(It,
+ BuildMI(MF, DebugLoc(), get(X86::JMP_1))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
+ } else {
+ // No, insert a call.
+ It = MBB.insert(It,
+ BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
+ }
+
+ return It;
+}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index acfdef4da7a3..2fee48570ce1 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -182,6 +182,20 @@ public:
///
const X86RegisterInfo &getRegisterInfo() const { return RI; }
+ /// Returns the stack pointer adjustment that happens inside the frame
+ /// setup..destroy sequence (e.g. by pushes, or inside the callee).
+ int64_t getFrameAdjustment(const MachineInstr &I) const {
+ assert(isFrameInstr(I));
+ return I.getOperand(1).getImm();
+ }
+
+ /// Sets the stack pointer adjustment made inside the frame made up by this
+ /// instruction.
+ void setFrameAdjustment(MachineInstr &I, int64_t V) const {
+ assert(isFrameInstr(I));
+ I.getOperand(1).setImm(V);
+ }
+
/// getSPAdjust - This returns the stack pointer adjustment made by
/// this instruction. For x86, we need to handle more complex call
/// sequences involving PUSHes.
@@ -316,6 +330,13 @@ public:
// Branch analysis.
bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
+ bool isUnconditionalTailCall(const MachineInstr &MI) const override;
+ bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
+ const MachineInstr &TailCall) const override;
+ void replaceBranchWithTailCall(MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ const MachineInstr &TailCall) const override;
+
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
@@ -436,9 +457,6 @@ public:
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const override;
- bool shouldScheduleAdjacent(const MachineInstr &First,
- const MachineInstr &Second) const override;
-
void getNoopForMachoTarget(MCInst &NopInst) const override;
bool
@@ -539,8 +557,28 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const override;
- bool isTailCall(const MachineInstr &Inst) const override;
+ unsigned getOutliningBenefit(size_t SequenceSize,
+ size_t Occurrences,
+ bool CanBeTailCall) const override;
+
+ bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
+
+ llvm::X86GenInstrInfo::MachineOutlinerInstrType
+ getOutliningType(MachineInstr &MI) const override;
+
+ void insertOutlinerEpilogue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const override;
+
+ void insertOutlinerPrologue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool isTailCall) const override;
+ MachineBasicBlock::iterator
+ insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &It,
+ MachineFunction &MF,
+ bool IsTailCall) const override;
protected:
/// Commutes the operands in the given instruction by changing the operands
/// order and/or changing the instruction's opcode and/or the immediate value
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 38036715a25a..e31d2769047b 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -318,6 +318,7 @@ let RenderMethod = "addMemOperands", SuperClasses = [X86MemAsmOperand] in {
def X86Mem128_RC256XOperand : AsmOperandClass { let Name = "Mem128_RC256X"; }
def X86Mem256_RC256XOperand : AsmOperandClass { let Name = "Mem256_RC256X"; }
def X86Mem512_RC256XOperand : AsmOperandClass { let Name = "Mem512_RC256X"; }
+ def X86Mem256_RC512Operand : AsmOperandClass { let Name = "Mem256_RC512"; }
def X86Mem512_RC512Operand : AsmOperandClass { let Name = "Mem512_RC512"; }
}
@@ -374,9 +375,10 @@ def vy256mem : X86VMemOperand<VR256, "printi256mem", X86Mem256_RC256Operand>;
def vx64xmem : X86VMemOperand<VR128X, "printi64mem", X86Mem64_RC128XOperand>;
def vx128xmem : X86VMemOperand<VR128X, "printi128mem", X86Mem128_RC128XOperand>;
def vx256xmem : X86VMemOperand<VR128X, "printi256mem", X86Mem256_RC128XOperand>;
-def vy128xmem : X86VMemOperand<VR256, "printi128mem", X86Mem128_RC256XOperand>;
+def vy128xmem : X86VMemOperand<VR256X, "printi128mem", X86Mem128_RC256XOperand>;
def vy256xmem : X86VMemOperand<VR256X, "printi256mem", X86Mem256_RC256XOperand>;
def vy512mem : X86VMemOperand<VR256X, "printi512mem", X86Mem512_RC256XOperand>;
+def vz256xmem : X86VMemOperand<VR512, "printi256mem", X86Mem256_RC512Operand>;
def vz512mem : X86VMemOperand<VR512, "printi512mem", X86Mem512_RC512Operand>;
// A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead
@@ -831,7 +833,6 @@ def HasXSAVEC : Predicate<"Subtarget->hasXSAVEC()">;
def HasXSAVES : Predicate<"Subtarget->hasXSAVES()">;
def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">;
def HasFMA : Predicate<"Subtarget->hasFMA()">;
-def UseFMAOnAVX : Predicate<"Subtarget->hasFMA() && !Subtarget->hasAVX512()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
def HasXOP : Predicate<"Subtarget->hasXOP()">;
def HasTBM : Predicate<"Subtarget->hasTBM()">;
@@ -848,8 +849,6 @@ def HasVBMI : Predicate<"Subtarget->hasVBMI()">,
def HasIFMA : Predicate<"Subtarget->hasIFMA()">,
AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">;
def HasRTM : Predicate<"Subtarget->hasRTM()">;
-def HasHLE : Predicate<"Subtarget->hasHLE()">;
-def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
def HasADX : Predicate<"Subtarget->hasADX()">;
def HasSHA : Predicate<"Subtarget->hasSHA()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
@@ -857,9 +856,11 @@ def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">;
+def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasMPX : Predicate<"Subtarget->hasMPX()">;
+def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
@@ -895,6 +896,7 @@ def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
+def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
def HasMFence : Predicate<"Subtarget->hasMFence()">;
//===----------------------------------------------------------------------===//
@@ -931,6 +933,15 @@ def i32immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
def i64immSExt8 : ImmLeaf<i64, [{ return isInt<8>(Imm); }]>;
def i64immSExt32 : ImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
+// FIXME: Ideally we would just replace the above i*immSExt* matchers with
+// relocImm-based matchers, but then FastISel would be unable to use them.
+def i64relocImmSExt8 : PatLeaf<(i64 relocImm), [{
+ return isSExtRelocImm<8>(N);
+}]>;
+def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{
+ return isSExtRelocImm<32>(N);
+}]>;
+
// If we have multiple users of an immediate, it's much smaller to reuse
// the register, rather than encode the immediate in every instruction.
// This has the risk of increasing register pressure from stretched live
@@ -971,6 +982,13 @@ def i64immSExt8_su : PatLeaf<(i64immSExt8), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
+def i64relocImmSExt8_su : PatLeaf<(i64relocImmSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i64relocImmSExt32_su : PatLeaf<(i64relocImmSExt32), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
// unsigned field.
def i64immZExt32 : ImmLeaf<i64, [{ return isUInt<32>(Imm); }]>;
@@ -1106,13 +1124,15 @@ def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
IIC_POP_REG>, OpSize32, Requires<[Not64BitMode]>;
def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
IIC_POP_REG>, OpSize16;
-def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", [],
- IIC_POP_MEM>, OpSize16;
def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
IIC_POP_REG>, OpSize32, Requires<[Not64BitMode]>;
+} // mayLoad, SchedRW
+let mayStore = 1, mayLoad = 1, SchedRW = [WriteRMW] in {
+def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", [],
+ IIC_POP_MEM>, OpSize16;
def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", [],
IIC_POP_MEM>, OpSize32, Requires<[Not64BitMode]>;
-} // mayLoad, SchedRW
+} // mayStore, mayLoad, WriteRMW
let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
@@ -1194,9 +1214,10 @@ def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
IIC_POP_REG>, OpSize32, Requires<[In64BitMode]>;
def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
IIC_POP_REG>, OpSize32, Requires<[In64BitMode]>;
+} // mayLoad, SchedRW
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in
def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", [],
IIC_POP_MEM>, OpSize32, Requires<[In64BitMode]>;
-} // mayLoad, SchedRW
let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>;
@@ -1965,7 +1986,12 @@ def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>,
Requires<[In64BitMode]>;
// Data16 instruction prefix
-def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>;
+def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>,
+ Requires<[Not16BitMode]>;
+
+// Data instruction prefix
+def DATA32_PREFIX : I<0x66, RawFrm, (outs), (ins), "data32", []>,
+ Requires<[In16BitMode]>;
// Repeat string operation instruction prefixes
// These uses the DF flag in the EFLAGS register to inc or dec ECX
@@ -2079,6 +2105,7 @@ def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
"arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>,
Requires<[Not64BitMode]>;
+let mayStore = 1 in
def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>,
Requires<[Not64BitMode]>;
@@ -2448,8 +2475,19 @@ def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>,
//===----------------------------------------------------------------------===//
// CLZERO Instruction
//
-let Uses = [EAX] in
-def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, TB;
+let SchedRW = [WriteSystem] in {
+ let Uses = [EAX] in
+ def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", [], IIC_SSE_CLZERO>,
+ TB, Requires<[HasCLZERO]>;
+
+ let usesCustomInserter = 1 in {
+ def CLZERO : PseudoI<(outs), (ins i32mem:$src1),
+ [(int_x86_clzero addr:$src1)]>, Requires<[HasCLZERO]>;
+ }
+} // SchedRW
+
+def : InstAlias<"clzero\t{%eax|eax}", (CLZEROr)>, Requires<[Not64BitMode]>;
+def : InstAlias<"clzero\t{%rax|rax}", (CLZEROr)>, Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Pattern fragments to auto generate TBM instructions.
@@ -2522,10 +2560,10 @@ let Predicates = [HasTBM] in {
// Memory Instructions
//
+let Predicates = [HasCLFLUSHOPT] in
def CLFLUSHOPT : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
"clflushopt\t$src", [(int_x86_clflushopt addr:$src)]>, PD;
def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src", []>, PD;
-def PCOMMIT : I<0xAE, MRM_F8, (outs), (ins), "pcommit", []>, PD;
//===----------------------------------------------------------------------===//
@@ -2977,7 +3015,7 @@ def : InstAlias<"mov\t{$mem, $seg|$seg, $mem}", (MOV32sm SEGMENT_REG:$seg, i32me
def : InstAlias<"mov\t{$seg, $mem|$mem, $seg}", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>;
// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
-def : InstAlias<"movq\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
+def : InstAlias<"mov{q}\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
// Match 'movq GR64, MMX' as an alias for movd.
def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 0bb106823983..dc3800ce381b 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -294,6 +294,7 @@ def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
[(set VR64:$dst, (load_mmx addr:$src))],
IIC_MMX_MOVQ_RM>;
} // SchedRW
+
let SchedRW = [WriteStore] in
def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -378,7 +379,6 @@ defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d,
defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw,
MMX_PHADDSUBW>;
-
// -- Subtraction
defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b,
MMX_INTALU_ITINS>;
@@ -479,13 +479,6 @@ defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
MMX_SHIFT_ITINS>;
-def : Pat<(int_x86_mmx_psrl_w VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRLWrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psrl_d VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRLDrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psrl_q VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRLQrm VR64:$src1, addr:$src2)>;
-
defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
MMX_SHIFT_ITINS>;
@@ -496,13 +489,6 @@ defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
MMX_SHIFT_ITINS>;
-def : Pat<(int_x86_mmx_psll_w VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSLLWrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psll_d VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSLLDrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psll_q VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSLLQrm VR64:$src1, addr:$src2)>;
-
defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
MMX_SHIFT_ITINS>;
@@ -510,11 +496,6 @@ defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
MMX_SHIFT_ITINS>;
-def : Pat<(int_x86_mmx_psra_w VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRAWrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psra_d VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRADrm VR64:$src1, addr:$src2)>;
-
// Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
MMX_INTALU_ITINS>;
@@ -576,9 +557,6 @@ def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
imm:$src2))],
IIC_MMX_PSHUF>, Sched<[WriteShuffleLd]>;
-
-
-
// -- Conversion Instructions
defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
@@ -639,7 +617,6 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
[(set GR32orGR64:$dst,
(int_x86_mmx_pmovmskb VR64:$src))]>;
-
// Low word of XMM to MMX.
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
[SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
@@ -670,6 +647,16 @@ def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
(MMX_MOVFR642Qrr FR64:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+ (bc_v2i64 (v4i32 (int_x86_sse2_cvtps2dq VR128:$src))))),
+ (MMX_CVTPS2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+ (bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))),
+ (MMX_CVTTPS2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+ (bc_v2i64 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
+ (MMX_CVTPD2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+ (bc_v2i64 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
+ (MMX_CVTTPD2PIirr VR128:$src)>;
}
-
-
diff --git a/lib/Target/X86/X86InstrMPX.td b/lib/Target/X86/X86InstrMPX.td
index 309f601d1fce..104ba2a174db 100644
--- a/lib/Target/X86/X86InstrMPX.td
+++ b/lib/Target/X86/X86InstrMPX.td
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
+let mayLoad = 1 in {
def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins i32mem:$src),
OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
Requires<[HasMPX, Not64BitMode]>;
@@ -21,16 +22,19 @@ multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
Requires<[HasMPX, In64BitMode]>;
}
+}
defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
+let mayLoad = 1 in {
def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i32mem:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rm: RI<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i64mem:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, In64BitMode]>;
+}
def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, Not64BitMode]>;
@@ -45,16 +49,18 @@ defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD;
def BNDMOVRMrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX]>;
+let mayLoad = 1 in {
def BNDMOVRM32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, Not64BitMode]>;
def BNDMOVRM64rm : RI<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, In64BitMode]>;
-
+}
def BNDMOVMRrr : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX]>;
+let mayStore = 1 in {
def BNDMOVMR32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, Not64BitMode]>;
@@ -65,6 +71,8 @@ def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src),
def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
"bndstx\t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
+}
+let mayLoad = 1 in
def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
"bndldx\t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 1812d01711d1..e1bf28cbf612 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -259,8 +259,8 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
- SDPatternOperator Int, RegisterClass RC,
- string asm, Operand memopr,
+ SDPatternOperator OpNode, RegisterClass RC,
+ ValueType VT, string asm, Operand memopr,
ComplexPattern mem_cpat, Domain d,
OpndItins itins, bit Is2Addr = 1> {
let isCodeGenOnly = 1, hasSideEffects = 0 in {
@@ -268,14 +268,14 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr, d>,
+ [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, mem_cpat:$src2))], itins.rm, d>,
+ [(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], itins.rm, d>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -446,9 +446,9 @@ def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteZero] in {
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoVLX_Or_NoDQI]>;
+ [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>;
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoVLX_Or_NoDQI]>;
+ [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoAVX512]>;
}
//===----------------------------------------------------------------------===//
@@ -461,12 +461,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [NoVLX], SchedRW = [WriteZero] in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
-let Predicates = [NoVLX] in
+let Predicates = [NoAVX512] in
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
@@ -475,7 +475,7 @@ def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
// at the rename stage without using any execution unit, so SET0PSY
// and SET0PDY can be used for vector int instructions without penalty
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [HasAVX, NoVLX], SchedRW = [WriteZero] in {
+ isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8i32 immAllZerosV))]>;
}
@@ -491,7 +491,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
[(set VR256:$dst, (v8i32 immAllOnesV))]>;
}
-
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move FP Scalar Instructions
//
@@ -527,12 +526,12 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
// AVX
defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>,
- VEX_4V, VEX_LIG;
+ VEX_4V, VEX_LIG, VEX_WIG;
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>,
- VEX, VEX_LIG, Sched<[WriteStore]>;
+ VEX, VEX_LIG, Sched<[WriteStore]>, VEX_WIG;
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
@@ -552,7 +551,7 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM, d>, VEX, VEX_LIG, Sched<[WriteLoad]>;
+ IIC_SSE_MOV_S_RM, d>, VEX, VEX_LIG, Sched<[WriteLoad]>, VEX_WIG;
def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
@@ -644,10 +643,6 @@ let Predicates = [UseAVX] in {
(VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
(VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
- def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
- def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
// 256-bit variants
def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
@@ -738,10 +733,6 @@ let Predicates = [UseSSE2] in {
(MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
(MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
- def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
- def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
// is during lowering, where it's not possible to recognize the fold because
@@ -786,29 +777,29 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in
let Predicates = [HasAVX, NoVLX] in {
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
- PS, VEX;
+ PS, VEX, VEX_WIG;
defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
"movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
- PD, VEX;
+ PD, VEX, VEX_WIG;
defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
"movups", SSEPackedSingle, SSE_MOVU_ITINS>,
- PS, VEX;
+ PS, VEX, VEX_WIG;
defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS>,
- PD, VEX;
+ PD, VEX, VEX_WIG;
defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
"movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
- PS, VEX, VEX_L;
+ PS, VEX, VEX_L, VEX_WIG;
defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
"movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
- PD, VEX, VEX_L;
+ PD, VEX, VEX_L, VEX_WIG;
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
"movups", SSEPackedSingle, SSE_MOVU_ITINS>,
- PS, VEX, VEX_L;
+ PS, VEX, VEX_L, VEX_WIG;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS>,
- PD, VEX, VEX_L;
+ PD, VEX, VEX_L, VEX_WIG;
}
let Predicates = [UseSSE1] in {
@@ -832,35 +823,35 @@ let SchedRW = [WriteStore], Predicates = [HasAVX, NoVLX] in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
- IIC_SSE_MOVA_P_MR>, VEX;
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_WIG;
def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(alignedstore (v2f64 VR128:$src), addr:$dst)],
- IIC_SSE_MOVA_P_MR>, VEX;
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_WIG;
def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v4f32 VR128:$src), addr:$dst)],
- IIC_SSE_MOVU_P_MR>, VEX;
+ IIC_SSE_MOVU_P_MR>, VEX, VEX_WIG;
def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)],
- IIC_SSE_MOVU_P_MR>, VEX;
+ IIC_SSE_MOVU_P_MR>, VEX, VEX_WIG;
def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore256 (v8f32 VR256:$src), addr:$dst)],
- IIC_SSE_MOVA_P_MR>, VEX, VEX_L;
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_L, VEX_WIG;
def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(alignedstore256 (v4f64 VR256:$src), addr:$dst)],
- IIC_SSE_MOVA_P_MR>, VEX, VEX_L;
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_L, VEX_WIG;
def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v8f32 VR256:$src), addr:$dst)],
- IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+ IIC_SSE_MOVU_P_MR>, VEX, VEX_L, VEX_WIG;
def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)],
- IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+ IIC_SSE_MOVU_P_MR>, VEX, VEX_L, VEX_WIG;
} // SchedRW
// For disassembler
@@ -869,35 +860,35 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG;
def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG;
def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movups\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG;
def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movupd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG;
def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG;
def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG;
def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movups\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG;
def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movupd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG;
}
// Aliases to help the assembler pick two byte VEX encodings by swapping the
@@ -955,24 +946,10 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
IIC_SSE_MOVU_P_RR>;
}
-// Use vmovaps/vmovups for AVX integer load/store.
let Predicates = [HasAVX, NoVLX] in {
- // 128-bit load/store
- def : Pat<(alignedloadv2i64 addr:$src),
- (VMOVAPSrm addr:$src)>;
- def : Pat<(loadv2i64 addr:$src),
- (VMOVUPSrm addr:$src)>;
-
- def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
-
- // 256-bit load/store
+ // 256-bit load/store need to use floating point load/store in case we don't
+ // have AVX2. Execution domain fixing will convert to integer if AVX2 is
+ // available and changing the domain is beneficial.
def : Pat<(alignedloadv4i64 addr:$src),
(VMOVAPSYrm addr:$src)>;
def : Pat<(loadv4i64 addr:$src),
@@ -981,10 +958,18 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVAPSYmr addr:$dst, VR256:$src)>;
def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),
(VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v4i64 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v8i32 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v16i16 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v32i8 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
// Special patterns for storing subvector extracts of lower 128-bits
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
@@ -994,18 +979,6 @@ let Predicates = [HasAVX, NoVLX] in {
def : Pat<(alignedstore (v4f32 (extract_subvector
(v8f32 VR256:$src), (iPTR 0))), addr:$dst),
(VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(alignedstore (v2i64 (extract_subvector
- (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(alignedstore (v4i32 (extract_subvector
- (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(alignedstore (v8i16 (extract_subvector
- (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(alignedstore (v16i8 (extract_subvector
- (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
def : Pat<(store (v2f64 (extract_subvector
(v4f64 VR256:$src), (iPTR 0))), addr:$dst),
@@ -1013,40 +986,6 @@ let Predicates = [HasAVX, NoVLX] in {
def : Pat<(store (v4f32 (extract_subvector
(v8f32 VR256:$src), (iPTR 0))), addr:$dst),
(VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(store (v2i64 (extract_subvector
- (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(store (v4i32 (extract_subvector
- (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(store (v8i16 (extract_subvector
- (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(store (v16i8 (extract_subvector
- (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
-}
-
-let Predicates = [HasAVX, NoVLX] in {
- // 128-bit load/store
- def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
-
- // 256-bit load/store
- def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
- (VMOVAPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
- (VMOVAPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(store (v16i16 VR256:$src), addr:$dst),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(store (v32i8 VR256:$src), addr:$dst),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
}
// Use movaps / movups for SSE integer load / store (one byte shorter).
@@ -1107,7 +1046,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
let Predicates = [UseAVX] in
defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- itin>, VEX_4V;
+ itin>, VEX_4V, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
@@ -1126,12 +1065,12 @@ def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
(iPTR 0))), addr:$dst)],
- IIC_SSE_MOV_LH>, VEX;
+ IIC_SSE_MOV_LH>, VEX, VEX_WIG;
def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)],
- IIC_SSE_MOV_LH>, VEX;
+ IIC_SSE_MOV_LH>, VEX, VEX_WIG;
}// UseAVX
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
@@ -1238,12 +1177,12 @@ def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (extractelt
(X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
(bc_v2f64 (v4f32 VR128:$src))),
- (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX, VEX_WIG;
def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
- (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX, VEX_WIG;
} // UseAVX
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
@@ -1343,14 +1282,14 @@ let AddedComplexity = 20, Predicates = [UseAVX] in {
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V, Sched<[WriteFShuffle]>;
+ VEX_4V, Sched<[WriteFShuffle]>, VEX_WIG;
def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V, Sched<[WriteFShuffle]>;
+ VEX_4V, Sched<[WriteFShuffle]>, VEX_WIG;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
@@ -1725,11 +1664,11 @@ defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, loadv2i64,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, SSE_CVT_PS>,
- PS, VEX, Requires<[HasAVX, NoVLX]>;
+ PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG;
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, loadv4i64,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, SSE_CVT_PS>,
- PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>;
+ PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG;
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memopv2i64,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
@@ -1777,20 +1716,21 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
// Convert scalar double to scalar single
let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
- (ins FR64:$src1, FR64:$src2),
+ (ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
- Sched<[WriteCvtF2F]>;
+ Sched<[WriteCvtF2F]>, VEX_WIG;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
- (ins FR64:$src1, f64mem:$src2),
+ (ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG;
}
-def : Pat<(f32 (fpround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+def : Pat<(f32 (fpround FR64:$src)),
+ (VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>,
Requires<[UseAVX]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
@@ -1810,15 +1750,15 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
- Sched<[WriteCvtF2F]>;
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, VEX_WIG,
+ Requires<[HasAVX]>, Sched<[WriteCvtF2F]>;
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, VEX_WIG,
+ Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in {
def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
@@ -1842,30 +1782,30 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem,
// SSE2 instructions with XS prefix
let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
- (ins FR32:$src1, FR32:$src2),
+ (ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RR>,
XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
- Sched<[WriteCvtF2F]>;
+ Sched<[WriteCvtF2F]>, VEX_WIG;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
- (ins FR32:$src1, f32mem:$src2),
+ (ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG;
}
def : Pat<(f64 (fpextend FR32:$src)),
- (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>;
+ (VCVTSS2SDrr (COPY_TO_REGCLASS FR32:$src, FR64), FR32:$src)>, Requires<[UseAVX]>;
def : Pat<(fpextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
+ (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[UseAVX, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
+ (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
Requires<[UseAVX, OptForSpeed]>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
@@ -1895,15 +1835,15 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
- Sched<[WriteCvtF2F]>;
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, VEX_WIG,
+ Requires<[HasAVX]>, Sched<[WriteCvtF2F]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, VEX_WIG,
+ Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -1999,22 +1939,22 @@ def : Pat<(v4f32 (X86Movss
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>, VEX_WIG;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtps2dq (loadv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>, VEX_WIG;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 (loadv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>, VEX_WIG;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
@@ -2035,7 +1975,7 @@ def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
- VEX, Sched<[WriteCvtF2I]>;
+ VEX, Sched<[WriteCvtF2I]>, VEX_WIG;
// XMM only
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
@@ -2044,7 +1984,7 @@ def VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dq{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX,
- Sched<[WriteCvtF2ILd]>;
+ Sched<[WriteCvtF2ILd]>, VEX_WIG;
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
(VCVTPD2DQrm VR128:$dst, f128mem:$src), 0>;
@@ -2053,12 +1993,12 @@ def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtF2I]>;
+ VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG;
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
+ VEX, VEX_L, Sched<[WriteCvtF2ILd]>, VEX_WIG;
def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
(VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>;
def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
@@ -2083,23 +2023,23 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (fp_to_sint (v4f32 VR128:$src))))],
- IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>, VEX_WIG;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (fp_to_sint (loadv4f32 addr:$src))))],
- IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>, VEX_WIG;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (fp_to_sint (v8f32 VR256:$src))))],
- IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (fp_to_sint (loadv8f32 addr:$src))))],
IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
- Sched<[WriteCvtF2ILd]>;
+ Sched<[WriteCvtF2ILd]>, VEX_WIG;
}
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -2118,7 +2058,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (v2f64 VR128:$src))))],
- IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>, VEX_WIG;
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
@@ -2132,7 +2072,7 @@ def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (loadv2f64 addr:$src))))],
- IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>, VEX_WIG;
def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQrm VR128:$dst, f128mem:$src), 0>;
@@ -2142,12 +2082,12 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (fp_to_sint (v4f64 VR256:$src))))],
- IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (fp_to_sint (loadv4f64 addr:$src))))],
- IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>, VEX_WIG;
}
def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
@@ -2193,19 +2133,19 @@ let Predicates = [HasAVX, NoVLX] in {
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))],
- IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;
+ IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>, VEX_WIG;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>;
+ IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>, VEX_WIG;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))],
- IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>;
+ IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>, VEX_WIG;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
+ IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>, VEX_WIG;
}
let Predicates = [UseSSE2] in {
@@ -2225,30 +2165,30 @@ let hasSideEffects = 0, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>,
- VEX, Sched<[WriteCvtI2FLd]>;
+ (v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))))]>,
+ VEX, Sched<[WriteCvtI2FLd]>, VEX_WIG;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>,
- VEX, Sched<[WriteCvtI2F]>;
+ VEX, Sched<[WriteCvtI2F]>, VEX_WIG;
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))))]>,
- VEX, VEX_L, Sched<[WriteCvtI2FLd]>;
+ VEX, VEX_L, Sched<[WriteCvtI2FLd]>, VEX_WIG;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v4f64 (sint_to_fp (v4i32 VR128:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtI2F]>;
+ VEX, VEX_L, Sched<[WriteCvtI2F]>, VEX_WIG;
}
let hasSideEffects = 0, mayLoad = 1 in
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))],
+ (v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))))],
IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -2276,7 +2216,7 @@ let Predicates = [HasAVX, NoVLX] in
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))],
- IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>;
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>, VEX_WIG;
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
@@ -2285,7 +2225,7 @@ let Predicates = [HasAVX, NoVLX] in
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>, VEX_WIG;
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
(VCVTPD2PSrm VR128:$dst, f128mem:$src), 0>;
@@ -2294,11 +2234,11 @@ let Predicates = [HasAVX, NoVLX] in {
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (fpround VR256:$src))],
- IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>, VEX_WIG;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (fpround (loadv4f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>, VEX_WIG;
}
def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
@@ -2368,21 +2308,25 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
}
}
+let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSE_ALU_F32S, i8immZExt5>, XS, VEX_4V, VEX_LIG;
+ SSE_ALU_F32S, i8immZExt5>, XS, VEX_4V, VEX_LIG, VEX_WIG;
+let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSE_ALU_F32S, i8immZExt5>, // same latency as 32 bit compare
- XD, VEX_4V, VEX_LIG;
+ XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
+ let ExeDomain = SSEPackedSingle in
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
"cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S,
i8immZExt3>, XS;
+ let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
@@ -2398,6 +2342,7 @@ multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
VR128:$src, immLeaf:$cc))],
itins.rr>,
Sched<[itins.Sched]>;
+let mayLoad = 1 in
def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
@@ -2408,18 +2353,22 @@ multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
let isCodeGenOnly = 1 in {
// Aliases to match intrinsics which expect XMM operand(s).
+ let ExeDomain = SSEPackedSingle in
defm Int_VCMPSS : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
SSE_ALU_F32S, i8immZExt5, sse_load_f32>,
XS, VEX_4V;
+ let ExeDomain = SSEPackedDouble in
defm Int_VCMPSD : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
SSE_ALU_F32S, i8immZExt5, sse_load_f64>, // same latency as f32
XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
+ let ExeDomain = SSEPackedSingle in
defm Int_CMPSS : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
SSE_ALU_F32S, i8immZExt3, sse_load_f32>, XS;
+ let ExeDomain = SSEPackedDouble in
defm Int_CMPSD : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
SSE_ALU_F64S, i8immZExt3, sse_load_f64>,
@@ -2437,6 +2386,7 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
IIC_SSE_COMIS_RR>,
Sched<[WriteFAdd]>;
+let mayLoad = 1 in
def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
@@ -2454,6 +2404,7 @@ multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
IIC_SSE_COMIS_RR>,
Sched<[WriteFAdd]>;
+let mayLoad = 1 in
def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
@@ -2464,26 +2415,26 @@ multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
let Defs = [EFLAGS] in {
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
- "ucomiss">, PS, VEX, VEX_LIG;
+ "ucomiss">, PS, VEX, VEX_LIG, VEX_WIG;
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
- "ucomisd">, PD, VEX, VEX_LIG;
+ "ucomisd">, PD, VEX, VEX_LIG, VEX_WIG;
let Pattern = []<dag> in {
defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
- "comiss">, PS, VEX, VEX_LIG;
+ "comiss">, PS, VEX, VEX_LIG, VEX_WIG;
defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
- "comisd">, PD, VEX, VEX_LIG;
+ "comisd">, PD, VEX, VEX_LIG, VEX_WIG;
}
let isCodeGenOnly = 1 in {
defm Int_VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss">, PS, VEX;
+ sse_load_f32, "ucomiss">, PS, VEX, VEX_WIG;
defm Int_VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd">, PD, VEX;
+ sse_load_f64, "ucomisd">, PD, VEX, VEX_WIG;
defm Int_VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss">, PS, VEX;
+ sse_load_f32, "comiss">, PS, VEX, VEX_WIG;
defm Int_VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd">, PD, VEX;
+ sse_load_f64, "comisd">, PD, VEX, VEX_WIG;
}
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss">, PS;
@@ -2512,18 +2463,19 @@ let Defs = [EFLAGS] in {
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
- Operand CC, Intrinsic Int, string asm,
+ Operand CC, ValueType VT, string asm,
string asm_alt, Domain d, ImmLeaf immLeaf,
PatFrag ld_frag, OpndItins itins = SSE_ALU_F32P> {
let isCommutable = 1 in
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, RC:$src2, immLeaf:$cc))],
+ [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, immLeaf:$cc)))],
itins.rr, d>,
Sched<[WriteFAdd]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2), immLeaf:$cc))],
+ [(set RC:$dst,
+ (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), immLeaf:$cc)))],
itins.rm, d>,
Sched<[WriteFAddLd, ReadAfterLd]>;
@@ -2540,67 +2492,33 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
}
}
-defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps,
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, v4f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedSingle, i8immZExt5, loadv4f32>, PS, VEX_4V;
-defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd,
+ SSEPackedSingle, i8immZExt5, loadv4f32>, PS, VEX_4V, VEX_WIG;
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, v2f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedDouble, i8immZExt5, loadv2f64>, PD, VEX_4V;
-defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
+ SSEPackedDouble, i8immZExt5, loadv2f64>, PD, VEX_4V, VEX_WIG;
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, v8f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSEPackedSingle, i8immZExt5, loadv8f32>, PS, VEX_4V, VEX_L;
-defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, v4f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSEPackedDouble, i8immZExt5, loadv4f64>, PD, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in {
- defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
+ defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, v4f32,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSEPackedSingle, i8immZExt5, memopv4f32, SSE_ALU_F32P>, PS;
- defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
+ defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, v2f64,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSEPackedDouble, i8immZExt5, memopv2f64, SSE_ALU_F64P>, PD;
}
-let Predicates = [HasAVX] in {
-def : Pat<(v4f32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
- (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4f32 (X86cmpp (v4f32 VR128:$src1), (loadv4f32 addr:$src2), imm:$cc)),
- (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
-def : Pat<(v2f64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
- (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2f64 (X86cmpp (v2f64 VR128:$src1), (loadv2f64 addr:$src2), imm:$cc)),
- (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
-
-def : Pat<(v8f32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
- (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
-def : Pat<(v8f32 (X86cmpp (v8f32 VR256:$src1), (loadv8f32 addr:$src2), imm:$cc)),
- (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
-def : Pat<(v4f64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
- (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
-def : Pat<(v4f64 (X86cmpp (v4f64 VR256:$src1), (loadv4f64 addr:$src2), imm:$cc)),
- (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
-}
-
-let Predicates = [UseSSE1] in {
-def : Pat<(v4f32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
- (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4f32 (X86cmpp (v4f32 VR128:$src1), (memopv4f32 addr:$src2), imm:$cc)),
- (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
-}
-
-let Predicates = [UseSSE2] in {
-def : Pat<(v2f64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
- (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2f64 (X86cmpp (v2f64 VR128:$src1), (memopv2f64 addr:$src2), imm:$cc)),
- (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
-}
-
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Shuffle Instructions
//===----------------------------------------------------------------------===//
@@ -2624,16 +2542,16 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
let Predicates = [HasAVX, NoVLX] in {
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- loadv4f32, SSEPackedSingle>, PS, VEX_4V;
+ loadv4f32, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L;
+ loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- loadv2f64, SSEPackedDouble>, PD, VEX_4V;
+ loadv2f64, SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L;
+ loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2715,29 +2633,29 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
let Predicates = [HasAVX, NoVLX] in {
defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, VEX_4V;
+ SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64,
VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, VEX_4V;
+ SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32,
VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, VEX_4V;
+ SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, VEX_4V;
+ SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32,
VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, VEX_4V, VEX_L;
+ SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64,
VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, VEX_4V, VEX_L;
+ SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32,
VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, VEX_4V, VEX_L;
+ SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, VEX_4V, VEX_L;
+ SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
}// Predicates = [HasAVX, NoVLX]
let Constraints = "$src1 = $dst" in {
defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
@@ -2789,13 +2707,13 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, ValueType vt,
let Predicates = [HasAVX] in {
defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
- SSEPackedSingle>, PS, VEX;
+ SSEPackedSingle>, PS, VEX, VEX_WIG;
defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
- SSEPackedDouble>, PD, VEX;
+ SSEPackedDouble>, PD, VEX, VEX_WIG;
defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps",
- SSEPackedSingle>, PS, VEX, VEX_L;
+ SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG;
defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd",
- SSEPackedDouble>, PD, VEX, VEX_L;
+ SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG;
}
defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
@@ -2839,7 +2757,7 @@ multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
OpndItins itins, bit IsCommutable = 0, Predicate prd> {
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
- VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
+ VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
@@ -2848,7 +2766,7 @@ let Constraints = "$src1 = $dst" in
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
OpVT256, VR256, loadv4i64, i256mem, itins,
- IsCommutable, 0>, VEX_4V, VEX_L;
+ IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG;
}
// These are ordered here for pattern ordering requirements with the fp versions
@@ -2876,7 +2794,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
[(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
(bc_v4i64 (v8f32 VR256:$src2))))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L;
+ (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem,
@@ -2884,14 +2802,14 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
(bc_v4i64 (v4f64 VR256:$src2))))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
(loadv4i64 addr:$src2)))], 0>,
- PD, VEX_4V, VEX_L;
+ PD, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
(bc_v2i64 (v4f32 VR128:$src2))))],
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V;
+ (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_WIG;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
@@ -2899,7 +2817,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
(bc_v2i64 (v2f64 VR128:$src2))))],
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
(loadv2i64 addr:$src2)))], 0>,
- PD, VEX_4V;
+ PD, VEX_4V, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -3065,17 +2983,17 @@ multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32,
- SSEPackedSingle, itins.s, 0>, PS, VEX_4V;
+ SSEPackedSingle, itins.s, 0>, PS, VEX_4V, VEX_WIG;
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
VR128, v2f64, f128mem, loadv2f64,
- SSEPackedDouble, itins.d, 0>, PD, VEX_4V;
+ SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_WIG;
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
OpNode, VR256, v8f32, f256mem, loadv8f32,
- SSEPackedSingle, itins.s, 0>, PS, VEX_4V, VEX_L;
+ SSEPackedSingle, itins.s, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
OpNode, VR256, v4f64, f256mem, loadv4f64,
- SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L;
+ SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -3092,10 +3010,10 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
SizeItins itins> {
defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
OpNode, FR32, f32mem, SSEPackedSingle, itins.s, 0>,
- XS, VEX_4V, VEX_LIG;
+ XS, VEX_4V, VEX_LIG, VEX_WIG;
defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
OpNode, FR64, f64mem, SSEPackedDouble, itins.d, 0>,
- XD, VEX_4V, VEX_LIG;
+ XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
@@ -3108,21 +3026,20 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
- SDPatternOperator IntSS,
- SDPatternOperator IntSD,
+ SDPatternOperator OpNode,
SizeItins itins> {
- defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, IntSS, VR128,
+ defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
- SSEPackedSingle, itins.s, 0>, XS, VEX_4V, VEX_LIG;
- defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, IntSD, VR128,
+ SSEPackedSingle, itins.s, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
+ defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
- SSEPackedDouble, itins.d, 0>, XD, VEX_4V, VEX_LIG;
+ SSEPackedDouble, itins.d, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
- defm SS : sse12_fp_scalar_int<opc, OpcodeStr, IntSS, VR128,
+ defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
SSEPackedSingle, itins.s>, XS;
- defm SD : sse12_fp_scalar_int<opc, OpcodeStr, IntSD, VR128,
+ defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
SSEPackedDouble, itins.d>, XD;
}
@@ -3131,29 +3048,23 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
// Binary Arithmetic instructions
defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x58, "add", null_frag, null_frag,
- SSE_ALU_ITINS_S>;
+ basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SSE_ALU_ITINS_S>;
defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, null_frag,
- SSE_MUL_ITINS_S>;
+ basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SSE_MUL_ITINS_S>;
let isCommutable = 0 in {
defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, null_frag,
- SSE_ALU_ITINS_S>;
+ basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag,SSE_ALU_ITINS_S>;
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, null_frag,
- SSE_DIV_ITINS_S>;
+ basic_sse12_fp_binop_s_int<0x5E, "div", null_frag,SSE_DIV_ITINS_S>;
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5F, "max", int_x86_sse_max_ss,
- int_x86_sse2_max_sd, SSE_ALU_ITINS_S>;
+ basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SSE_ALU_ITINS_S>;
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5D, "min", int_x86_sse_min_ss,
- int_x86_sse2_min_sd, SSE_ALU_ITINS_S>;
+ basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SSE_ALU_ITINS_S>;
}
let isCodeGenOnly = 1 in {
@@ -3400,7 +3311,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
Sched<[itins.Sched.Folded, ReadAfterLd]>,
Requires<[target, OptForSize]>;
- let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
+ let isCodeGenOnly = 1, Constraints = "$src1 = $dst", ExeDomain = d in {
def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
@@ -3444,7 +3355,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[], itins.rm, d>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
- let isCodeGenOnly = 1 in {
+ let isCodeGenOnly = 1, ExeDomain = d in {
def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -3465,7 +3376,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
// which has a clobber before the rcp, vs.
// vrcpss mem, %xmm0, %xmm0
// TODO: In theory, we could fold the load, and avoid the stall caused by
- // the partial register store, either in ExeDepFix or with smarter RA.
+ // the partial register store, either in ExecutionDepsFix or with smarter RA.
let Predicates = [UseAVX] in {
def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)
(ScalarVT (IMPLICIT_DEF)), RC:$src)>;
@@ -3495,22 +3406,22 @@ let Predicates = prds in {
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
- itins.rr>, VEX, Sched<[itins.Sched]>;
+ itins.rr>, VEX, Sched<[itins.Sched]>, VEX_WIG;
def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))],
- itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>, VEX_WIG;
def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
- itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>, VEX_WIG;
def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))],
- itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>, VEX_WIG;
}
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3531,22 +3442,22 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
- itins.rr>, VEX, Sched<[itins.Sched]>;
+ itins.rr>, VEX, Sched<[itins.Sched]>, VEX_WIG;
def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))],
- itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>, VEX_WIG;
def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
- itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>, VEX_WIG;
def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))],
- itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>, VEX_WIG;
}
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3567,7 +3478,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
f32mem,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
- SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG;
+ SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG;
}
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3579,7 +3490,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
f64mem,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
OpNode, SSEPackedDouble, itins, "SD">,
- XD, VEX_4V, VEX_LIG;
+ XD, VEX_4V, VEX_LIG, VEX_WIG;
}
// Square root.
@@ -3647,41 +3558,41 @@ def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
+ IIC_SSE_MOVNT>, VEX, VEX_WIG;
def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2f64 VR128:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
+ IIC_SSE_MOVNT>, VEX, VEX_WIG;
let ExeDomain = SSEPackedInt in
def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
+ (ins i128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
+ IIC_SSE_MOVNT>, VEX, VEX_WIG;
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v8f32 VR256:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
+ IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG;
def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f64 VR256:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
+ IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG;
let ExeDomain = SSEPackedInt in
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
+ (ins i256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4i64 VR256:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
+ IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG;
}
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
@@ -3797,20 +3708,18 @@ def : Pat<(X86MFence), (MFENCE)>;
//===----------------------------------------------------------------------===//
def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
- IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>;
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
+ IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>, VEX_WIG;
def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
- IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>;
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
+ IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>, VEX_WIG;
-let Predicates = [UseSSE1] in {
def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
- IIC_SSE_LDMXCSR>, TB, Sched<[WriteLoad]>;
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
+ IIC_SSE_LDMXCSR>, TB, Sched<[WriteLoad]>;
def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
- IIC_SSE_STMXCSR>, TB, Sched<[WriteStore]>;
-}
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
+ IIC_SSE_STMXCSR>, TB, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
@@ -3821,16 +3730,16 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions
let hasSideEffects = 0, SchedRW = [WriteMove] in {
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
- VEX;
+ VEX, VEX_WIG;
def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
- VEX, VEX_L;
+ VEX, VEX_L, VEX_WIG;
def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
- VEX;
+ VEX, VEX_WIG;
def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
- VEX, VEX_L;
+ VEX, VEX_L, VEX_WIG;
}
// For Disassembler
@@ -3839,54 +3748,58 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>,
- VEX;
+ VEX, VEX_WIG;
def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG;
def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVU_P_RR>,
- VEX;
+ VEX, VEX_WIG;
def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
"movdqu\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG;
}
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
hasSideEffects = 0, SchedRW = [WriteLoad] in {
+let Predicates = [HasAVX,NoVLX] in
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
- VEX;
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (alignedloadv2i64 addr:$src))],
+ IIC_SSE_MOVA_P_RM>, VEX, VEX_WIG;
def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
- VEX, VEX_L;
-let Predicates = [HasAVX] in {
- def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
- XS, VEX;
- def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
- XS, VEX, VEX_L;
-}
+ VEX, VEX_L, VEX_WIG;
+let Predicates = [HasAVX,NoVLX] in
+def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (loadv2i64 addr:$src))],
+ IIC_SSE_MOVU_P_RM>, XS, VEX, VEX_WIG;
+def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
+ XS, VEX, VEX_L, VEX_WIG;
}
let mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in {
+let Predicates = [HasAVX,NoVLX] in
def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
- VEX;
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2i64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_WIG;
def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
- VEX, VEX_L;
-let Predicates = [HasAVX] in {
+ VEX, VEX_L, VEX_WIG;
+let Predicates = [HasAVX,NoVLX] in
def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
- XS, VEX;
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [(store (v2i64 VR128:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>,
+ XS, VEX, VEX_WIG;
def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
- XS, VEX, VEX_L;
-}
+ XS, VEX, VEX_L, VEX_WIG;
}
let SchedRW = [WriteMove] in {
@@ -3949,6 +3862,50 @@ def : InstAlias<"vmovdqu\t{$src, $dst|$dst, $src}",
def : InstAlias<"vmovdqu\t{$src, $dst|$dst, $src}",
(VMOVDQUYrr_REV VR256L:$dst, VR256H:$src), 0>;
+let Predicates = [HasAVX, NoVLX] in {
+ // Additional patterns for other integer sizes.
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVDQAmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVDQAmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVDQAmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (VMOVDQUmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (VMOVDQUmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (VMOVDQUmr addr:$dst, VR128:$src)>;
+
+ // Special patterns for storing subvector extracts of lower 128-bits
+ // Its cheaper to just use VMOVDQA/VMOVDQU instead of VEXTRACTF128mr
+ def : Pat<(alignedstore (v2i64 (extract_subvector
+ (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQAmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v4i32 (extract_subvector
+ (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQAmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v8i16 (extract_subvector
+ (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQAmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v16i8 (extract_subvector
+ (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQAmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+
+ def : Pat<(store (v2i64 (extract_subvector
+ (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQUmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v4i32 (extract_subvector
+ (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQUmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v8i16 (extract_subvector
+ (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQUmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v16i8 (extract_subvector
+ (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQUmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+}
+
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Arithmetic Instructions
//===---------------------------------------------------------------------===//
@@ -4037,12 +3994,12 @@ defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
- loadv2i64, i128mem, SSE_PMADD, 0>, VEX_4V;
+ loadv2i64, i128mem, SSE_PMADD, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
VR256, loadv4i64, i256mem, SSE_PMADD,
- 0>, VEX_4V, VEX_L;
+ 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
memopv2i64, i128mem, SSE_PMADD>;
@@ -4050,11 +4007,11 @@ defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 0>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
loadv4i64, i256mem, SSE_INTMUL_ITINS_P, 0>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
memopv2i64, i128mem, SSE_INTALU_ITINS_P>;
@@ -4062,11 +4019,11 @@ defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
let Predicates = [HasAVX, NoVLX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 0>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX] in
defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
VR256, loadv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 0>, VEX_4V, VEX_L;
+ SSE_INTMUL_ITINS_P, 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
memopv2i64, i128mem, SSE_INTMUL_ITINS_P>;
@@ -4113,11 +4070,11 @@ multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
OpNode, OpNode2, VR128, DstVT128, SrcVT,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
OpNode, OpNode2, VR256, DstVT256, SrcVT,
- loadv2i64, 0>, VEX_4V, VEX_L;
+ loadv2i64, 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
VR128, DstVT128, SrcVT, memopv2i64>;
@@ -4138,10 +4095,10 @@ multiclass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
- VR128, v16i8, 0>, VEX_4V;
+ VR128, v16i8, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
- VR256, v32i8, 0>, VEX_4V, VEX_L;
+ VR256, v32i8, 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8>;
}
@@ -4202,7 +4159,7 @@ let Predicates = [HasAVX, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>, VEX_WIG;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
!strconcat("v", OpcodeStr,
@@ -4210,7 +4167,7 @@ let Predicates = [HasAVX, prd] in {
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)),
(i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX,
- Sched<[WriteShuffleLd]>;
+ Sched<[WriteShuffleLd]>, VEX_WIG;
}
let Predicates = [HasAVX2, prd] in {
@@ -4220,7 +4177,7 @@ let Predicates = [HasAVX2, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>, VEX_WIG;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, u8imm:$src2),
!strconcat("v", OpcodeStr,
@@ -4228,7 +4185,7 @@ let Predicates = [HasAVX2, prd] in {
[(set VR256:$dst,
(vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)),
(i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L,
- Sched<[WriteShuffleLd]>;
+ Sched<[WriteShuffleLd]>, VEX_WIG;
}
let Predicates = [UseSSE2] in {
@@ -4257,20 +4214,6 @@ defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw,
defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw,
NoVLX_Or_NoBWI>, XD;
-let Predicates = [HasAVX] in {
- def : Pat<(v4f32 (X86PShufd (loadv4f32 addr:$src1), (i8 imm:$imm))),
- (VPSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (VPSHUFDri VR128:$src1, imm:$imm)>;
-}
-
-let Predicates = [UseSSE2] in {
- def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
- (PSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (PSHUFDri VR128:$src1, imm:$imm)>;
-}
-
//===---------------------------------------------------------------------===//
// Packed Integer Pack Instructions (SSE & AVX)
//===---------------------------------------------------------------------===//
@@ -4364,24 +4307,24 @@ multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus,
loadv2i64, 0>, VEX_4V;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus>,
VEX_4V, VEX_L;
}
@@ -4443,44 +4386,44 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX] in {
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -4565,14 +4508,14 @@ def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))],
- IIC_SSE_MOVMSK>, VEX;
+ IIC_SSE_MOVMSK>, VEX, VEX_WIG;
let Predicates = [HasAVX2] in {
def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR256:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>,
- VEX, VEX_L;
+ VEX, VEX_L, VEX_WIG;
}
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
@@ -4593,13 +4536,13 @@ def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
- IIC_SSE_MASKMOV>, VEX;
+ IIC_SSE_MASKMOV>, VEX, VEX_WIG;
let Uses = [RDI], Predicates = [HasAVX,In64BitMode] in
def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
- IIC_SSE_MASKMOV>, VEX;
+ IIC_SSE_MASKMOV>, VEX, VEX_WIG;
let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
@@ -4725,19 +4668,6 @@ def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
} // ExeDomain = SSEPackedInt
-
-def : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>;
-
-def : Pat<(v4i64 (X86Vinsert (bc_v4i64 (v8i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>;
-
-def : Pat<(v8i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>;
-
-def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>;
-
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int first element to Doubleword Int
//
@@ -4758,12 +4688,12 @@ def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
} //SchedRW
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
-def VMOVPQIto64rm : VRS2I<0x7E, MRMDestMem, (outs),
+def VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs),
(ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
-def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
} // ExeDomain = SSEPackedInt
@@ -4837,6 +4767,8 @@ let Predicates = [UseAVX] in {
// AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
// These instructions also write zeros in the high part of a 256-bit register.
let AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
+ (VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
@@ -4866,6 +4798,8 @@ let Predicates = [UseSSE2] in {
(MOV64toPQIrr GR64:$src)>;
}
let AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
+ (MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
@@ -4903,7 +4837,7 @@ def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
- VEX, Requires<[UseAVX]>;
+ VEX, Requires<[UseAVX]>, VEX_WIG;
def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4920,7 +4854,7 @@ def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (extractelt (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, VEX_WIG;
def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (extractelt (v2i64 VR128:$src),
@@ -4932,7 +4866,7 @@ def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
SchedRW = [WriteVecLogic] in {
def VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, VEX;
+ "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, VEX, VEX_WIG;
def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>;
}
@@ -4978,7 +4912,7 @@ def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
IIC_SSE_MOVQ_RR>,
- XS, VEX, Requires<[UseAVX]>;
+ XS, VEX, Requires<[UseAVX]>, VEX_WIG;
let AddedComplexity = 15 in
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -5016,13 +4950,13 @@ def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
let Predicates = [HasAVX, NoVLX] in {
defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v4f32, VR128, loadv4f32, f128mem>, VEX;
+ v4f32, VR128, loadv4f32, f128mem>, VEX, VEX_WIG;
defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v4f32, VR128, loadv4f32, f128mem>, VEX;
+ v4f32, VR128, loadv4f32, f128mem>, VEX, VEX_WIG;
defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L;
+ v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L, VEX_WIG;
defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L;
+ v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L, VEX_WIG;
}
defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
memopv4f32, f128mem>;
@@ -5090,8 +5024,8 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
}
let Predicates = [HasAVX, NoVLX] in {
- defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
- defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L;
+ defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX, VEX_WIG;
+ defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L, VEX_WIG;
}
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
@@ -5108,16 +5042,6 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVDDUPYrr VR256:$src)>;
}
-let Predicates = [HasAVX] in {
- def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
- def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
- def : Pat<(X86Movddup (bc_v2f64
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-}
-
let Predicates = [HasAVX, NoVLX] in
def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
(VMOVDDUPrm addr:$src)>;
@@ -5128,13 +5052,6 @@ def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
let Predicates = [UseSSE3] in {
def : Pat<(X86Movddup (memopv2f64 addr:$src)),
(MOVDDUPrm addr:$src)>;
- def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
- (MOVDDUPrm addr:$src)>;
- def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
- (MOVDDUPrm addr:$src)>;
- def : Pat<(X86Movddup (bc_v2f64
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
- (MOVDDUPrm addr:$src)>;
}
//===---------------------------------------------------------------------===//
@@ -5145,11 +5062,11 @@ let SchedRW = [WriteLoad] in {
let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX, VEX_WIG;
def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
- VEX, VEX_L;
+ VEX, VEX_L, VEX_WIG;
}
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
@@ -5183,15 +5100,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
- f128mem, SSE_ALU_F32P, loadv4f32, 0>, XD, VEX_4V;
+ f128mem, SSE_ALU_F32P, loadv4f32, 0>, XD, VEX_4V, VEX_WIG;
defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
- f256mem, SSE_ALU_F32P, loadv8f32, 0>, XD, VEX_4V, VEX_L;
+ f256mem, SSE_ALU_F32P, loadv8f32, 0>, XD, VEX_4V, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
- f128mem, SSE_ALU_F64P, loadv2f64, 0>, PD, VEX_4V;
+ f128mem, SSE_ALU_F64P, loadv2f64, 0>, PD, VEX_4V, VEX_WIG;
defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
- f256mem, SSE_ALU_F64P, loadv4f64, 0>, PD, VEX_4V, VEX_L;
+ f256mem, SSE_ALU_F64P, loadv4f64, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
}
}
let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
@@ -5278,23 +5195,23 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
- X86fhadd, loadv4f32, 0>, VEX_4V;
+ X86fhadd, loadv4f32, 0>, VEX_4V, VEX_WIG;
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
- X86fhsub, loadv4f32, 0>, VEX_4V;
+ X86fhsub, loadv4f32, 0>, VEX_4V, VEX_WIG;
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
- X86fhadd, loadv8f32, 0>, VEX_4V, VEX_L;
+ X86fhadd, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
- X86fhsub, loadv8f32, 0>, VEX_4V, VEX_L;
+ X86fhsub, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
- X86fhadd, loadv2f64, 0>, VEX_4V;
+ X86fhadd, loadv2f64, 0>, VEX_4V, VEX_WIG;
defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
- X86fhsub, loadv2f64, 0>, VEX_4V;
+ X86fhsub, loadv2f64, 0>, VEX_4V, VEX_WIG;
defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
- X86fhadd, loadv4f64, 0>, VEX_4V, VEX_L;
+ X86fhadd, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
- X86fhsub, loadv4f64, 0>, VEX_4V, VEX_L;
+ X86fhsub, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
}
}
@@ -5352,84 +5269,24 @@ multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
Sched<[WriteVecALULd]>;
}
-// Helper fragments to match sext vXi1 to vXiY.
-def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
- VR128:$src))>;
-def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>;
-def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>;
-def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
- VR256:$src))>;
-def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>;
-def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>;
-
-let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, X86Abs, loadv2i64>, VEX;
- defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, X86Abs, loadv2i64>, VEX;
-}
-let Predicates = [HasAVX, NoVLX] in {
- defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, X86Abs, loadv2i64>, VEX;
-}
-
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- def : Pat<(xor
- (bc_v2i64 (v16i1sextv16i8)),
- (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
- (VPABSBrr VR128:$src)>;
- def : Pat<(xor
- (bc_v2i64 (v8i1sextv8i16)),
- (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
- (VPABSWrr VR128:$src)>;
+ defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, loadv2i64>, VEX, VEX_WIG;
+ defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, loadv2i64>, VEX, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(xor
- (bc_v2i64 (v4i1sextv4i32)),
- (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
- (VPABSDrr VR128:$src)>;
-}
-
-let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, X86Abs>, VEX, VEX_L;
- defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, X86Abs>, VEX, VEX_L;
-}
-let Predicates = [HasAVX2, NoVLX] in {
- defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, X86Abs>, VEX, VEX_L;
+ defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, loadv2i64>, VEX, VEX_WIG;
}
-
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- def : Pat<(xor
- (bc_v4i64 (v32i1sextv32i8)),
- (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
- (VPABSBYrr VR256:$src)>;
- def : Pat<(xor
- (bc_v4i64 (v16i1sextv16i16)),
- (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
- (VPABSWYrr VR256:$src)>;
+ defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs>, VEX, VEX_L, VEX_WIG;
+ defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs>, VEX, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
- def : Pat<(xor
- (bc_v4i64 (v8i1sextv8i32)),
- (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
- (VPABSDYrr VR256:$src)>;
+ defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs>, VEX, VEX_L, VEX_WIG;
}
-defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, X86Abs, memopv2i64>;
-defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, X86Abs, memopv2i64>;
-defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, X86Abs, memopv2i64>;
-
-let Predicates = [UseSSSE3] in {
- def : Pat<(xor
- (bc_v2i64 (v16i1sextv16i8)),
- (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
- (PABSBrr VR128:$src)>;
- def : Pat<(xor
- (bc_v2i64 (v8i1sextv8i16)),
- (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
- (PABSWrr VR128:$src)>;
- def : Pat<(xor
- (bc_v2i64 (v4i1sextv4i32)),
- (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
- (PABSDrr VR128:$src)>;
-}
+defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, memopv2i64>;
+defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, memopv2i64>;
+defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
@@ -5527,45 +5384,45 @@ let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
VR128, loadv2i64, i128mem,
- SSE_PSHUFB, 0>, VEX_4V;
+ SSE_PSHUFB, 0>, VEX_4V, VEX_WIG;
defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
v16i8, VR128, loadv2i64, i128mem,
- SSE_PMADD, 0>, VEX_4V;
+ SSE_PMADD, 0>, VEX_4V, VEX_WIG;
}
defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
VR128, loadv2i64, i128mem,
- SSE_PMULHRSW, 0>, VEX_4V;
+ SSE_PMULHRSW, 0>, VEX_4V, VEX_WIG;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
loadv2i64, i128mem,
- SSE_PHADDSUBW, 0>, VEX_4V;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_WIG;
defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
loadv2i64, i128mem,
- SSE_PHADDSUBD, 0>, VEX_4V;
+ SSE_PHADDSUBD, 0>, VEX_4V, VEX_WIG;
defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
loadv2i64, i128mem,
- SSE_PHADDSUBW, 0>, VEX_4V;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_WIG;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
loadv2i64, i128mem,
SSE_PHADDSUBD, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128,
- SSE_PSIGN, loadv2i64, 0>, VEX_4V;
+ SSE_PSIGN, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw",
int_x86_ssse3_psign_w_128,
- SSE_PSIGN, loadv2i64, 0>, VEX_4V;
+ SSE_PSIGN, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd",
int_x86_ssse3_psign_d_128,
- SSE_PSIGN, loadv2i64, 0>, VEX_4V;
+ SSE_PSIGN, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128,
- SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V;
+ SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128,
- SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V;
+ SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V, VEX_WIG;
}
}
@@ -5573,42 +5430,42 @@ let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
VR256, loadv4i64, i256mem,
- SSE_PSHUFB, 0>, VEX_4V, VEX_L;
+ SSE_PSHUFB, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
v32i8, VR256, loadv4i64, i256mem,
- SSE_PMADD, 0>, VEX_4V, VEX_L;
+ SSE_PMADD, 0>, VEX_4V, VEX_L, VEX_WIG;
}
defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
VR256, loadv4i64, i256mem,
- SSE_PMULHRSW, 0>, VEX_4V, VEX_L;
+ SSE_PMULHRSW, 0>, VEX_4V, VEX_L, VEX_WIG;
}
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
VR256, loadv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
loadv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
VR256, loadv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSIGNBY : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
- WriteVecALU>, VEX_4V, VEX_L;
+ WriteVecALU>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNWY : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
- WriteVecALU>, VEX_4V, VEX_L;
+ WriteVecALU>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNDY : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d,
- WriteVecALU>, VEX_4V, VEX_L;
+ WriteVecALU>, VEX_4V, VEX_L, VEX_WIG;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw,
- WriteVecALU>, VEX_4V, VEX_L;
+ WriteVecALU>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw,
- WriteVecALU>, VEX_4V, VEX_L;
+ WriteVecALU>, VEX_4V, VEX_L, VEX_WIG;
}
}
@@ -5686,9 +5543,9 @@ multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
}
let Predicates = [HasAVX] in
- defm VPALIGNR : ssse3_palignr<"vpalignr", 0>, VEX_4V;
+ defm VPALIGNR : ssse3_palignr<"vpalignr", 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2] in
- defm VPALIGNR : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L;
+ defm VPALIGNR : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
defm PALIGNR : ssse3_palignr<"palignr">;
@@ -5779,10 +5636,10 @@ multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, SSEItins>;
let Predicates = [HasAVX, prd] in
defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
- VR128, VR128, AVXItins>, VEX;
+ VR128, VR128, AVXItins>, VEX, VEX_WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
- VR256, VR128, AVX2Itins>, VEX, VEX_L;
+ VR256, VR128, AVX2Itins>, VEX, VEX_L, VEX_WIG;
}
multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
@@ -6010,12 +5867,12 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
}
}
-defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
-defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
+defm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec, extloadi32i16>;
+defm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec, loadi16_anyext>;
let Predicates = [UseSSE41] in {
- defm : SS41I_pmovx_patterns<"PMOVSX", "s", X86vsext, extloadi32i16>;
- defm : SS41I_pmovx_patterns<"PMOVZX", "z", X86vzext, loadi16_anyext>;
+ defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec, extloadi32i16>;
+ defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec, loadi16_anyext>;
}
//===----------------------------------------------------------------------===//
@@ -6103,20 +5960,20 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR64:$dst,
(extractelt (v2i64 VR128:$src1), imm:$src2))]>,
- Sched<[WriteShuffle]>, REX_W;
+ Sched<[WriteShuffle]>;
let SchedRW = [WriteShuffleLd, WriteRMW] in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i64mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (v2i64 VR128:$src1), imm:$src2),
- addr:$dst)]>, REX_W;
+ addr:$dst)]>;
}
let Predicates = [HasAVX, NoDQI] in
defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
-defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
+defm PEXTRQ : SS41I_extract64<0x16, "pextrq">, REX_W;
/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
/// destination
@@ -6140,7 +5997,7 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr,
let ExeDomain = SSEPackedSingle in {
let Predicates = [UseAVX] in
- defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+ defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, VEX_WIG;
defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>;
}
@@ -6268,7 +6125,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
let ExeDomain = SSEPackedSingle in {
let Predicates = [UseAVX] in
- defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+ defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>;
}
@@ -6461,14 +6318,14 @@ let Predicates = [HasAVX] in {
defm VROUND : sse41_fp_unop_p<0x08, 0x09, "vround", f128mem, VR128,
loadv4f32, loadv2f64,
int_x86_sse41_round_ps,
- int_x86_sse41_round_pd>, VEX;
+ int_x86_sse41_round_pd>, VEX, VEX_WIG;
defm VROUNDY : sse41_fp_unop_p<0x08, 0x09, "vround", f256mem, VR256,
loadv8f32, loadv4f64,
int_x86_avx_round_ps_256,
- int_x86_avx_round_pd_256>, VEX, VEX_L;
+ int_x86_avx_round_pd_256>, VEX, VEX_L, VEX_WIG;
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround",
int_x86_sse41_round_ss,
- int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
+ int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG, VEX_WIG;
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
}
@@ -6606,20 +6463,20 @@ let Defs = [EFLAGS], Predicates = [HasAVX] in {
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
- Sched<[WriteVecLogic]>, VEX;
+ Sched<[WriteVecLogic]>, VEX, VEX_WIG;
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
- Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX;
+ Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_WIG;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
- Sched<[WriteVecLogic]>, VEX, VEX_L;
+ Sched<[WriteVecLogic]>, VEX, VEX_L, VEX_WIG;
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
- Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L;
+ Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L, VEX_WIG;
}
let Defs = [EFLAGS] in {
@@ -6722,7 +6579,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in
defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
int_x86_sse41_phminposuw, loadv2i64,
- WriteVecIMul>, VEX;
+ WriteVecIMul>, VEX, VEX_WIG;
defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
int_x86_sse41_phminposuw, memopv2i64,
WriteVecIMul>;
@@ -6778,65 +6635,65 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX, NoVLX] in {
defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMULDQ : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32,
VR128, loadv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32,
VR256, loadv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -6864,18 +6721,18 @@ let Constraints = "$src1 = $dst" in {
let Predicates = [HasAVX, NoVLX] in {
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_PMULLD_ITINS>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2] in {
defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_PMULLD_ITINS>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -6945,52 +6802,52 @@ let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
VR128, loadv2i64, i128mem, 0,
- DEFAULT_ITINS_MPSADSCHED>, VEX_4V;
+ DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_WIG;
}
let ExeDomain = SSEPackedSingle in {
defm VBLENDPS : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v4f32,
VR128, loadv4f32, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_WIG;
defm VBLENDPSY : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v8f32,
VR256, loadv8f32, f256mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VBLENDPD : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
VR128, loadv2f64, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_WIG;
defm VBLENDPDY : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
VR256, loadv4f64, f256mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L, VEX_WIG;
}
defm VPBLENDW : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
VR128, loadv2i64, i128mem, 0,
- DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
+ DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR128, loadv4f32, f128mem, 0,
- SSE_DPPS_ITINS>, VEX_4V;
+ SSE_DPPS_ITINS>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
VR128, loadv2f64, f128mem, 0,
- SSE_DPPS_ITINS>, VEX_4V;
+ SSE_DPPS_ITINS>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
VR256, loadv8f32, i256mem, 0,
- SSE_DPPS_ITINS>, VEX_4V, VEX_L;
+ SSE_DPPS_ITINS>, VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
VR256, loadv4i64, i256mem, 0,
- DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L;
+ DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L, VEX_WIG;
}
defm VPBLENDWY : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
VR256, loadv4i64, i256mem, 0,
- DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
+ DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -7020,6 +6877,19 @@ let Constraints = "$src1 = $dst" in {
SSE_DPPD_ITINS>;
}
+// For insertion into the zero index (low half) of a 256-bit vector, it is
+// more efficient to generate a blend with immediate instead of an insert*128.
+let Predicates = [HasAVX] in {
+def : Pat<(insert_subvector (v4f64 VR256:$src1), (v2f64 VR128:$src2), (iPTR 0)),
+ (VBLENDPDYrri VR256:$src1,
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0x3)>;
+def : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)),
+ (VBLENDPSYrri VR256:$src1,
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+}
+
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop,
@@ -7165,14 +7035,14 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,
- "\t{$src2, $dst|$dst, $src2}"),
+ "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))],
itins.rr>, Sched<[itins.Sched]>;
def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src2),
!strconcat(OpcodeStr,
- "\t{$src2, $dst|$dst, $src2}"),
+ "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
[(set VR128:$dst,
(IntId VR128:$src1,
(bitconvert (mem_frag addr:$src2)), XMM0))],
@@ -7193,18 +7063,18 @@ defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem,
DEFAULT_ITINS_VARBLENDSCHED>;
// Aliases with the implicit xmm0 argument
-def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (BLENDVPDrr0 VR128:$dst, VR128:$src2)>;
-def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>;
-def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (BLENDVPSrr0 VR128:$dst, VR128:$src2)>;
-def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>;
-def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (PBLENDVBrr0 VR128:$dst, VR128:$src2)>;
-def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>;
+def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
+ (BLENDVPDrr0 VR128:$dst, VR128:$src2), 0>;
+def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
+ (BLENDVPDrm0 VR128:$dst, f128mem:$src2), 0>;
+def : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}",
+ (BLENDVPSrr0 VR128:$dst, VR128:$src2), 0>;
+def : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}",
+ (BLENDVPSrm0 VR128:$dst, f128mem:$src2), 0>;
+def : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
+ (PBLENDVBrr0 VR128:$dst, VR128:$src2), 0>;
+def : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
+ (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>;
let Predicates = [UseSSE41] in {
def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
@@ -7228,17 +7098,14 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
let SchedRW = [WriteLoad] in {
let Predicates = [HasAVX, NoVLX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
- VEX;
+ "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
+ VEX, VEX_WIG;
let Predicates = [HasAVX2, NoVLX] in
def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>,
- VEX, VEX_L;
+ "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
+ VEX, VEX_L, VEX_WIG;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movntdqa\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
+ "movntdqa\t{$src, $dst|$dst, $src}", []>;
} // SchedRW
let Predicates = [HasAVX2, NoVLX] in {
@@ -7295,11 +7162,11 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX] in
defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
- loadv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2] in
defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
- loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
@@ -7323,7 +7190,7 @@ multiclass pseudo_pcmpistrm<string asm, PatFrag ld_frag> {
let Defs = [EFLAGS], usesCustomInserter = 1 in {
defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128", loadv2i64>,
- Requires<[HasAVX]>;
+ Requires<[HasAVX]>, VEX_WIG;
defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128", memopv2i64>,
Requires<[UseSSE42]>;
}
@@ -7397,7 +7264,7 @@ multiclass pseudo_pcmpistri<string asm, PatFrag ld_frag> {
let Defs = [EFLAGS], usesCustomInserter = 1 in {
defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI", loadv2i64>,
- Requires<[HasAVX]>;
+ Requires<[HasAVX]>, VEX_WIG;
defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI", memopv2i64>,
Requires<[UseSSE42]>;
}
@@ -7515,14 +7382,18 @@ multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
bit UsesXMM0 = 0> {
def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !if(UsesXMM0,
+ !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
[!if(UsesXMM0,
(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
(set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8;
def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !if(UsesXMM0,
+ !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
[!if(UsesXMM0,
(set VR128:$dst, (IntId VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2)), XMM0)),
@@ -7557,10 +7428,10 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
}
// Aliases with explicit %xmm0
-def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (SHA256RNDS2rr VR128:$dst, VR128:$src2)>;
-def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (SHA256RNDS2rm VR128:$dst, i128mem:$src2)>;
+def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
+ (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>;
+def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
+ (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>;
//===----------------------------------------------------------------------===//
// AES-NI Instructions
@@ -7588,13 +7459,13 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
// Perform One Round of an AES Encryption/Decryption Flow
let Predicates = [HasAVX, HasAES] in {
defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
- int_x86_aesni_aesenc, loadv2i64, 0>, VEX_4V;
+ int_x86_aesni_aesenc, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
- int_x86_aesni_aesenclast, loadv2i64, 0>, VEX_4V;
+ int_x86_aesni_aesenclast, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
- int_x86_aesni_aesdec, loadv2i64, 0>, VEX_4V;
+ int_x86_aesni_aesdec, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
- int_x86_aesni_aesdeclast, loadv2i64, 0>, VEX_4V;
+ int_x86_aesni_aesdeclast, loadv2i64, 0>, VEX_4V, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -7615,12 +7486,12 @@ let Predicates = [HasAVX, HasAES] in {
"vaesimc\t{$src1, $dst|$dst, $src1}",
[(set VR128:$dst,
(int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>,
- VEX;
+ VEX, VEX_WIG;
def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"vaesimc\t{$src1, $dst|$dst, $src1}",
[(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>,
- Sched<[WriteAESIMCLd]>, VEX;
+ Sched<[WriteAESIMCLd]>, VEX, VEX_WIG;
}
def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1),
@@ -7640,13 +7511,13 @@ let Predicates = [HasAVX, HasAES] in {
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
- Sched<[WriteAESKeyGen]>, VEX;
+ Sched<[WriteAESKeyGen]>, VEX, VEX_WIG;
def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>,
- Sched<[WriteAESKeyGenLd]>, VEX;
+ Sched<[WriteAESKeyGenLd]>, VEX, VEX_WIG;
}
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, u8imm:$src2),
@@ -7672,14 +7543,14 @@ def VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128:$dst,
(int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>,
- Sched<[WriteCLMul]>;
+ Sched<[WriteCLMul]>, VEX_WIG;
def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
(loadv2i64 addr:$src2), imm:$src3))]>,
- Sched<[WriteCLMulLd, ReadAfterLd]>;
+ Sched<[WriteCLMulLd, ReadAfterLd]>, VEX_WIG;
// Carry-less Multiplication instructions
let Constraints = "$src1 = $dst" in {
@@ -7879,6 +7750,15 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
[]>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L;
}
+
+// Without AVX2 we need to concat two v4i32 V_SETALLONES to create a 256-bit
+// all ones value.
+let Predicates = [HasAVX1Only] in
+def : Pat<(v8i32 immAllOnesV),
+ (VINSERTF128rr
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), (V_SETALLONES), sub_xmm),
+ (V_SETALLONES), 1)>;
+
multiclass vinsert_lowering<string InstrStr, ValueType From, ValueType To,
PatFrag memop_frag> {
def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2),
@@ -8029,41 +7909,6 @@ let ExeDomain = SSEPackedDouble in {
loadv4i64, v4f64, v4i64>, VEX_L;
}
-let Predicates = [HasAVX, NoVLX] in {
-def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))),
- (VPERMILPSYrr VR256:$src1, VR256:$src2)>;
-def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
- (VPERMILPSYrm VR256:$src1, addr:$src2)>;
-def : Pat<(v4f64 (X86VPermilpv VR256:$src1, (v4i64 VR256:$src2))),
- (VPERMILPDYrr VR256:$src1, VR256:$src2)>;
-def : Pat<(v4f64 (X86VPermilpv VR256:$src1, (loadv4i64 addr:$src2))),
- (VPERMILPDYrm VR256:$src1, addr:$src2)>;
-
-def : Pat<(v8i32 (X86VPermilpi VR256:$src1, (i8 imm:$imm))),
- (VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilpi VR256:$src1, (i8 imm:$imm))),
- (VPERMILPDYri VR256:$src1, imm:$imm)>;
-def : Pat<(v8i32 (X86VPermilpi (bc_v8i32 (loadv4i64 addr:$src1)),
- (i8 imm:$imm))),
- (VPERMILPSYmi addr:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilpi (loadv4i64 addr:$src1), (i8 imm:$imm))),
- (VPERMILPDYmi addr:$src1, imm:$imm)>;
-
-def : Pat<(v4f32 (X86VPermilpv VR128:$src1, (v4i32 VR128:$src2))),
- (VPERMILPSrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v4f32 (X86VPermilpv VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)))),
- (VPERMILPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2f64 (X86VPermilpv VR128:$src1, (v2i64 VR128:$src2))),
- (VPERMILPDrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2f64 (X86VPermilpv VR128:$src1, (loadv2i64 addr:$src2))),
- (VPERMILPDrm VR128:$src1, addr:$src2)>;
-
-def : Pat<(v2i64 (X86VPermilpi VR128:$src1, (i8 imm:$imm))),
- (VPERMILPDri VR128:$src1, imm:$imm)>;
-def : Pat<(v2i64 (X86VPermilpi (loadv2i64 addr:$src1), (i8 imm:$imm))),
- (VPERMILPDmi addr:$src1, imm:$imm)>;
-}
-
//===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
//
@@ -8118,15 +7963,16 @@ def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
//===----------------------------------------------------------------------===//
// VZERO - Zero YMM registers
//
+// Note, these instruction do not affect the YMM16-YMM31.
let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
// Zero All YMM registers
def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
- [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, Requires<[HasAVX]>;
+ [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, Requires<[HasAVX]>, VEX_WIG;
// Zero Upper bits of YMM registers
def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
- [(int_x86_avx_vzeroupper)]>, PS, VEX, Requires<[HasAVX]>;
+ [(int_x86_avx_vzeroupper)]>, PS, VEX, Requires<[HasAVX]>, VEX_WIG;
}
//===----------------------------------------------------------------------===//
@@ -8235,6 +8081,46 @@ defm VPBLENDD : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v4i32,
defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32,
VR256, loadv4i64, i256mem>, VEX_L;
+// For insertion into the zero index (low half) of a 256-bit vector, it is
+// more efficient to generate a blend with immediate instead of an insert*128.
+let Predicates = [HasAVX2] in {
+def : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
+ (VPBLENDDYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)),
+ (VPBLENDDYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)),
+ (VPBLENDDYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
+ (VPBLENDDYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+}
+
+let Predicates = [HasAVX1Only] in {
+def : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
+ (VBLENDPSYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)),
+ (VBLENDPSYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)),
+ (VBLENDPSYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
+ (VBLENDPSYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+}
+
//===----------------------------------------------------------------------===//
// VPBROADCAST - Load from memory and broadcast to all elements of the
// destination operand
@@ -8282,6 +8168,11 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
v2i64, v4i64, NoVLX>;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+ // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQrm addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQYrm addr:$src)>;
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
@@ -8296,7 +8187,7 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
(VPBROADCASTWYrm addr:$src)>;
}
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
// Provide aliases for broadcast from the same register class that
// automatically does the extract.
def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
@@ -8343,18 +8234,13 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
}
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
+ (VPBROADCASTDrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
- (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
- def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
- (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
-
- // The patterns for VPBROADCASTD are not needed because they would match
- // the exact same thing as VBROADCASTSS patterns.
-
+ (VPBROADCASTDYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
def : Pat<(v2i64 (X86VBroadcast GR64:$src)),
- (VPBROADCASTQrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
- // The v4i64 pattern is not needed because VBROADCASTSDYrr already match.
+ (VPBROADCASTQrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
+ def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
+ (VPBROADCASTQYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
}
// AVX1 broadcast patterns
@@ -8377,15 +8263,15 @@ let Predicates = [HasAVX, NoVLX] in {
let Predicates = [HasAVX1Only] in {
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
- (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
+ (VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
- (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
- (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
+ (VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
+ (VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
- (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
- (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
+ (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_xmm),
+ (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128)), 1)>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
(VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
@@ -8399,7 +8285,7 @@ let Predicates = [HasAVX1Only] in {
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
def : Pat<(v2i64 (X86VBroadcast i64:$src)),
- (VMOVDDUPrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44)>;
}
//===----------------------------------------------------------------------===//
@@ -8407,7 +8293,8 @@ let Predicates = [HasAVX1Only] in {
//
multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- ValueType OpVT, X86FoldableSchedWrite Sched> {
+ ValueType OpVT, X86FoldableSchedWrite Sched,
+ X86MemOperand memOp> {
let Predicates = [HasAVX2, NoVLX] in {
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
@@ -8417,7 +8304,7 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
(OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
Sched<[Sched]>, VEX_4V, VEX_L;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, i256mem:$src2),
+ (ins VR256:$src1, memOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
@@ -8427,12 +8314,15 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
}
}
-defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256>;
+defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256,
+ i256mem>;
let ExeDomain = SSEPackedSingle in
-defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256>;
+defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256,
+ f256mem>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- ValueType OpVT, X86FoldableSchedWrite Sched> {
+ ValueType OpVT, X86FoldableSchedWrite Sched,
+ X86MemOperand memOp> {
let Predicates = [HasAVX2, NoVLX] in {
def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, u8imm:$src2),
@@ -8442,7 +8332,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
(OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
Sched<[Sched]>, VEX, VEX_L;
def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
- (ins i256mem:$src1, u8imm:$src2),
+ (ins memOp:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
@@ -8453,10 +8343,10 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
}
defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64,
- WriteShuffle256>, VEX_W;
+ WriteShuffle256, i256mem>, VEX_W;
let ExeDomain = SSEPackedDouble in
defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
- WriteFShuffle256>, VEX_W;
+ WriteFShuffle256, f256mem>, VEX_W;
//===----------------------------------------------------------------------===//
// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index e2be73532157..0efb383e1c8d 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -340,75 +340,71 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
let hasSideEffects = 0 in {
let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+
+let Uses = [CL, EFLAGS] in {
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcl{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcl{w}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize16;
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcl{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcl{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
+} // Uses = [CL, EFLAGS]
+
+let Uses = [EFLAGS] in {
def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t$dst", [], IIC_SR>;
def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt),
"rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
-let Uses = [CL] in
-def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
- "rcl{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
-
def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"rcl{w}\t$dst", [], IIC_SR>, OpSize16;
def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$cnt),
"rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
-let Uses = [CL] in
-def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "rcl{w}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize16;
-
def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
"rcl{l}\t$dst", [], IIC_SR>, OpSize32;
def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$cnt),
"rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
-let Uses = [CL] in
-def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "rcl{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
-
-
def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
"rcl{q}\t$dst", [], IIC_SR>;
def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
"rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
-let Uses = [CL] in
-def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
- "rcl{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
+} // Uses = [EFLAGS]
+let Uses = [CL, EFLAGS] in {
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcr{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcr{w}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize16;
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcr{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
+def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcr{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
+} // Uses = [CL, EFLAGS]
+let Uses = [EFLAGS] in {
def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t$dst", [], IIC_SR>;
def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt),
"rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
-let Uses = [CL] in
-def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
- "rcr{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
-
def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"rcr{w}\t$dst", [], IIC_SR>, OpSize16;
def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$cnt),
"rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
-let Uses = [CL] in
-def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "rcr{w}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize16;
-
def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"rcr{l}\t$dst", [], IIC_SR>, OpSize32;
def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$cnt),
"rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
-let Uses = [CL] in
-def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "rcr{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
-
def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
"rcr{q}\t$dst", [], IIC_SR>;
def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
"rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
-let Uses = [CL] in
-def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
- "rcr{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
+} // Uses = [EFLAGS]
} // Constraints = "$src = $dst"
-let SchedRW = [WriteShiftLd, WriteRMW] in {
+let SchedRW = [WriteShiftLd, WriteRMW], mayStore = 1 in {
+let Uses = [EFLAGS] in {
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t$dst", [], IIC_SR>;
def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, u8imm:$cnt),
@@ -442,8 +438,9 @@ def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
"rcr{q}\t$dst", [], IIC_SR>;
def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, u8imm:$cnt),
"rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+} // Uses = [EFLAGS]
-let Uses = [CL] in {
+let Uses = [CL, EFLAGS] in {
def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
@@ -461,7 +458,7 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
"rcr{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
"rcr{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
-}
+} // Uses = [CL, EFLAGS]
} // SchedRW
} // hasSideEffects = 0
@@ -665,19 +662,19 @@ def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, u8imm:$src),
// Rotate by 1
def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
"ror{b}\t$dst",
- [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+ [(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst)],
IIC_SR>;
def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
"ror{w}\t$dst",
- [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+ [(store (rotl (loadi16 addr:$dst), (i8 15)), addr:$dst)],
IIC_SR>, OpSize16;
def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
"ror{l}\t$dst",
- [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+ [(store (rotl (loadi32 addr:$dst), (i8 31)), addr:$dst)],
IIC_SR>, OpSize32;
def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
"ror{q}\t$dst",
- [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+ [(store (rotl (loadi64 addr:$dst), (i8 63)), addr:$dst)],
IIC_SR>;
} // SchedRW
@@ -849,6 +846,15 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
} // Defs = [EFLAGS]
+// Sandy Bridge and newer Intel processors support faster rotates using
+// SHLD to avoid a partial flag update on the normal rotate instructions.
+let Predicates = [HasFastSHLDRotate], AddedComplexity = 5 in {
+ def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
+ (SHLD32rri8 GR32:$src, GR32:$src, imm:$shamt)>;
+ def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
+ (SHLD64rri8 GR64:$src, GR64:$src, imm:$shamt)>;
+}
+
def ROT32L2R_imm8 : SDNodeXForm<imm, [{
// Convert a ROTL shamt to a ROTR shamt on 32-bit integer.
return getI8Imm(32 - N->getZExtValue(), SDLoc(N));
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 9265d64b3230..2e5350ce979e 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -173,27 +173,28 @@ def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize32;
def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>;
-
+let mayStore = 1 in {
def MOV16ms : I<0x8C, MRMDestMem, (outs), (ins i16mem:$dst, SEGMENT_REG:$src),
"mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>, OpSize16;
def MOV32ms : I<0x8C, MRMDestMem, (outs), (ins i32mem:$dst, SEGMENT_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>, OpSize32;
def MOV64ms : RI<0x8C, MRMDestMem, (outs), (ins i64mem:$dst, SEGMENT_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>;
-
+}
def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
"mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>, OpSize16;
def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>, OpSize32;
def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>;
-
+let mayLoad = 1 in {
def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
"mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>, OpSize16;
def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>, OpSize32;
def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
+}
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -202,6 +203,7 @@ def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
let SchedRW = [WriteSystem] in {
def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
+let mayLoad = 1 in
def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB,
OpSize16;
@@ -210,6 +212,7 @@ def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
OpSize16;
// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+let mayLoad = 1 in
def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB,
OpSize32;
@@ -217,23 +220,27 @@ def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB,
OpSize32;
// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
+let mayLoad = 1 in
def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB;
def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB;
+let mayLoad = 1 in
def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB,
OpSize16;
def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB,
OpSize16;
+let mayLoad = 1 in
def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB,
OpSize32;
def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB,
OpSize32;
+let mayLoad = 1 in
def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB;
def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
@@ -248,11 +255,13 @@ def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
"str{l}\t$dst", [], IIC_STR>, TB, OpSize32;
def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
"str{q}\t$dst", [], IIC_STR>, TB;
+let mayStore = 1 in
def STRm : I<0x00, MRM1m, (outs), (ins i16mem:$dst),
"str{w}\t$dst", [], IIC_STR>, TB;
def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
"ltr{w}\t$src", [], IIC_LTR>, TB;
+let mayLoad = 1 in
def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
"ltr{w}\t$src", [], IIC_LTR>, TB;
@@ -377,12 +386,14 @@ def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
"verr\t$seg", [], IIC_VERR>, TB;
-def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
- "verr\t$seg", [], IIC_VERR>, TB;
def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
"verw\t$seg", [], IIC_VERW_MEM>, TB;
+let mayLoad = 1 in {
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
+ "verr\t$seg", [], IIC_VERR>, TB;
def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
"verw\t$seg", [], IIC_VERW_REG>, TB;
+}
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -403,6 +414,7 @@ def SIDT64m : I<0x01, MRM1m, (outs), (ins opaque80mem:$dst),
"sidt{q}\t$dst", []>, TB, Requires <[In64BitMode]>;
def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
"sldt{w}\t$dst", [], IIC_SLDT>, TB, OpSize16;
+let mayStore = 1 in
def SLDT16m : I<0x00, MRM0m, (outs), (ins i16mem:$dst),
"sldt{w}\t$dst", [], IIC_SLDT>, TB;
def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
@@ -412,6 +424,7 @@ def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
// extension.
def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
"sldt{q}\t$dst", [], IIC_SLDT>, TB;
+let mayStore = 1 in
def SLDT64m : RI<0x00, MRM0m, (outs), (ins i16mem:$dst),
"sldt{q}\t$dst", [], IIC_SLDT>, TB;
@@ -429,6 +442,7 @@ def LIDT64m : I<0x01, MRM3m, (outs), (ins opaque80mem:$src),
"lidt{q}\t$src", [], IIC_LIDT>, TB, Requires<[In64BitMode]>;
def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
"lldt{w}\t$src", [], IIC_LLDT_REG>, TB;
+let mayLoad = 1 in
def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
"lldt{w}\t$src", [], IIC_LLDT_MEM>, TB;
} // SchedRW
@@ -459,6 +473,7 @@ def SMSW16m : I<0x01, MRM4m, (outs), (ins i16mem:$dst),
def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
"lmsw{w}\t$src", [], IIC_LMSW_MEM>, TB;
+let mayLoad = 1 in
def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
"lmsw{w}\t$src", [], IIC_LMSW_REG>, TB;
diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td
index 7267d752653e..38ac8be94483 100644
--- a/lib/Target/X86/X86InstrTSX.td
+++ b/lib/Target/X86/X86InstrTSX.td
@@ -25,9 +25,9 @@ def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
let isBranch = 1, isTerminator = 1, Defs = [EAX] in {
def XBEGIN_2 : Ii16PCRel<0xc7, MRM_F8, (outs), (ins brtarget16:$dst),
- "xbegin\t$dst", []>, OpSize16, Requires<[HasRTM]>;
+ "xbegin\t$dst", []>, OpSize16;
def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget32:$dst),
- "xbegin\t$dst", []>, OpSize32, Requires<[HasRTM]>;
+ "xbegin\t$dst", []>, OpSize32;
}
def XEND : I<0x01, MRM_D5, (outs), (ins),
@@ -35,7 +35,7 @@ def XEND : I<0x01, MRM_D5, (outs), (ins),
let Defs = [EFLAGS] in
def XTEST : I<0x01, MRM_D6, (outs), (ins),
- "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasTSX]>;
+ "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasRTM]>;
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
"xabort\t$imm",
@@ -44,7 +44,7 @@ def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
// HLE prefixes
let isAsmParserOnly = 1 in {
-def XACQUIRE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "xacquire", []>, Requires<[HasHLE]>;
-def XRELEASE_PREFIX : I<0xF3, RawFrm, (outs), (ins), "xrelease", []>, Requires<[HasHLE]>;
+def XACQUIRE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "xacquire", []>;
+def XRELEASE_PREFIX : I<0xF3, RawFrm, (outs), (ins), "xrelease", []>;
}
diff --git a/lib/Target/X86/X86InstrTablesInfo.h b/lib/Target/X86/X86InstrTablesInfo.h
deleted file mode 100755
index 415a891bfd97..000000000000
--- a/lib/Target/X86/X86InstrTablesInfo.h
+++ /dev/null
@@ -1,1162 +0,0 @@
-//===-- X86InstrTablesInfo.h - X86 Instruction Tables -----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains related X86 Instruction Information Tables.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_X86_X86INSTRTABLESINFO_H
-#define LLVM_LIB_TARGET_X86_X86INSTRTABLESINFO_H
-
-using namespace llvm;
-
-struct X86EvexToVexCompressTableEntry {
- uint16_t EvexOpcode;
- uint16_t VexOpcode;
-};
-
-
-
-// X86 EVEX encoded instructions that have a VEX 128 encoding
-// (table format: <EVEX opcode, VEX-128 opcode>).
-static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = {
- // EVEX scalar with corresponding VEX.
- { X86::Int_VCOMISDZrm , X86::Int_VCOMISDrm },
- { X86::Int_VCOMISDZrr , X86::Int_VCOMISDrr },
- { X86::Int_VCOMISSZrm , X86::Int_VCOMISSrm },
- { X86::Int_VCOMISSZrr , X86::Int_VCOMISSrr },
- { X86::Int_VUCOMISDZrm , X86::Int_VUCOMISDrm },
- { X86::Int_VUCOMISDZrr , X86::Int_VUCOMISDrr },
- { X86::Int_VUCOMISSZrm , X86::Int_VUCOMISSrm },
- { X86::Int_VUCOMISSZrr , X86::Int_VUCOMISSrr },
- { X86::VADDSDZrm , X86::VADDSDrm },
- { X86::VADDSDZrm_Int , X86::VADDSDrm_Int },
- { X86::VADDSDZrr , X86::VADDSDrr },
- { X86::VADDSDZrr_Int , X86::VADDSDrr_Int },
- { X86::VADDSSZrm , X86::VADDSSrm },
- { X86::VADDSSZrm_Int , X86::VADDSSrm_Int },
- { X86::VADDSSZrr , X86::VADDSSrr },
- { X86::VADDSSZrr_Int , X86::VADDSSrr_Int },
- { X86::VCOMISDZrm , X86::VCOMISDrm },
- { X86::VCOMISDZrr , X86::VCOMISDrr },
- { X86::VCOMISSZrm , X86::VCOMISSrm },
- { X86::VCOMISSZrr , X86::VCOMISSrr },
- { X86::VCVTSD2SI64Zrm , X86::VCVTSD2SI64rm },
- { X86::VCVTSD2SI64Zrr , X86::VCVTSD2SI64rr },
- { X86::VCVTSD2SIZrm , X86::VCVTSD2SIrm },
- { X86::VCVTSD2SIZrr , X86::VCVTSD2SIrr },
- { X86::VCVTSD2SSZrm , X86::VCVTSD2SSrm },
- { X86::VCVTSD2SSZrr , X86::VCVTSD2SSrr },
- { X86::VCVTSI2SDZrm , X86::VCVTSI2SDrm },
- { X86::VCVTSI2SDZrm_Int , X86::Int_VCVTSI2SDrm },
- { X86::VCVTSI2SDZrr , X86::VCVTSI2SDrr },
- { X86::VCVTSI2SDZrr_Int , X86::Int_VCVTSI2SDrr },
- { X86::VCVTSI2SSZrm , X86::VCVTSI2SSrm },
- { X86::VCVTSI2SSZrm_Int , X86::Int_VCVTSI2SSrm },
- { X86::VCVTSI2SSZrr , X86::VCVTSI2SSrr },
- { X86::VCVTSI2SSZrr_Int , X86::Int_VCVTSI2SSrr },
- { X86::VCVTSS2SDZrm , X86::VCVTSS2SDrm },
- { X86::VCVTSS2SDZrr , X86::VCVTSS2SDrr },
- { X86::VCVTSS2SI64Zrm , X86::VCVTSS2SI64rm },
- { X86::VCVTSS2SI64Zrr , X86::VCVTSS2SI64rr },
- { X86::VCVTSS2SIZrm , X86::VCVTSS2SIrm },
- { X86::VCVTSS2SIZrr , X86::VCVTSS2SIrr },
- { X86::VCVTTSD2SI64Zrm , X86::VCVTTSD2SI64rm },
- { X86::VCVTTSD2SI64Zrm_Int , X86::Int_VCVTTSD2SI64rm },
- { X86::VCVTTSD2SI64Zrr , X86::VCVTTSD2SI64rr },
- { X86::VCVTTSD2SI64Zrr_Int , X86::Int_VCVTTSD2SI64rr },
- { X86::VCVTTSD2SIZrm , X86::VCVTTSD2SIrm },
- { X86::VCVTTSD2SIZrm_Int , X86::Int_VCVTTSD2SIrm },
- { X86::VCVTTSD2SIZrr , X86::VCVTTSD2SIrr },
- { X86::VCVTTSD2SIZrr_Int , X86::Int_VCVTTSD2SIrr },
- { X86::VCVTTSS2SI64Zrm , X86::VCVTTSS2SI64rm },
- { X86::VCVTTSS2SI64Zrm_Int , X86::Int_VCVTTSS2SI64rm },
- { X86::VCVTTSS2SI64Zrr , X86::VCVTTSS2SI64rr },
- { X86::VCVTTSS2SI64Zrr_Int , X86::Int_VCVTTSS2SI64rr },
- { X86::VCVTTSS2SIZrm , X86::VCVTTSS2SIrm },
- { X86::VCVTTSS2SIZrm_Int , X86::Int_VCVTTSS2SIrm },
- { X86::VCVTTSS2SIZrr , X86::VCVTTSS2SIrr },
- { X86::VCVTTSS2SIZrr_Int , X86::Int_VCVTTSS2SIrr },
- { X86::VDIVSDZrm , X86::VDIVSDrm },
- { X86::VDIVSDZrm_Int , X86::VDIVSDrm_Int },
- { X86::VDIVSDZrr , X86::VDIVSDrr },
- { X86::VDIVSDZrr_Int , X86::VDIVSDrr_Int },
- { X86::VDIVSSZrm , X86::VDIVSSrm },
- { X86::VDIVSSZrm_Int , X86::VDIVSSrm_Int },
- { X86::VDIVSSZrr , X86::VDIVSSrr },
- { X86::VDIVSSZrr_Int , X86::VDIVSSrr_Int },
- { X86::VFMADD132SDZm , X86::VFMADD132SDm },
- { X86::VFMADD132SDZm_Int , X86::VFMADD132SDm_Int },
- { X86::VFMADD132SDZr , X86::VFMADD132SDr },
- { X86::VFMADD132SDZr_Int , X86::VFMADD132SDr_Int },
- { X86::VFMADD132SSZm , X86::VFMADD132SSm },
- { X86::VFMADD132SSZm_Int , X86::VFMADD132SSm_Int },
- { X86::VFMADD132SSZr , X86::VFMADD132SSr },
- { X86::VFMADD132SSZr_Int , X86::VFMADD132SSr_Int },
- { X86::VFMADD213SDZm , X86::VFMADD213SDm },
- { X86::VFMADD213SDZm_Int , X86::VFMADD213SDm_Int },
- { X86::VFMADD213SDZr , X86::VFMADD213SDr },
- { X86::VFMADD213SDZr_Int , X86::VFMADD213SDr_Int },
- { X86::VFMADD213SSZm , X86::VFMADD213SSm },
- { X86::VFMADD213SSZm_Int , X86::VFMADD213SSm_Int },
- { X86::VFMADD213SSZr , X86::VFMADD213SSr },
- { X86::VFMADD213SSZr_Int , X86::VFMADD213SSr_Int },
- { X86::VFMADD231SDZm , X86::VFMADD231SDm },
- { X86::VFMADD231SDZm_Int , X86::VFMADD231SDm_Int },
- { X86::VFMADD231SDZr , X86::VFMADD231SDr },
- { X86::VFMADD231SDZr_Int , X86::VFMADD231SDr_Int },
- { X86::VFMADD231SSZm , X86::VFMADD231SSm },
- { X86::VFMADD231SSZm_Int , X86::VFMADD231SSm_Int },
- { X86::VFMADD231SSZr , X86::VFMADD231SSr },
- { X86::VFMADD231SSZr_Int , X86::VFMADD231SSr_Int },
- { X86::VFMSUB132SDZm , X86::VFMSUB132SDm },
- { X86::VFMSUB132SDZm_Int , X86::VFMSUB132SDm_Int },
- { X86::VFMSUB132SDZr , X86::VFMSUB132SDr },
- { X86::VFMSUB132SDZr_Int , X86::VFMSUB132SDr_Int },
- { X86::VFMSUB132SSZm , X86::VFMSUB132SSm },
- { X86::VFMSUB132SSZm_Int , X86::VFMSUB132SSm_Int },
- { X86::VFMSUB132SSZr , X86::VFMSUB132SSr },
- { X86::VFMSUB132SSZr_Int , X86::VFMSUB132SSr_Int },
- { X86::VFMSUB213SDZm , X86::VFMSUB213SDm },
- { X86::VFMSUB213SDZm_Int , X86::VFMSUB213SDm_Int },
- { X86::VFMSUB213SDZr , X86::VFMSUB213SDr },
- { X86::VFMSUB213SDZr_Int , X86::VFMSUB213SDr_Int },
- { X86::VFMSUB213SSZm , X86::VFMSUB213SSm },
- { X86::VFMSUB213SSZm_Int , X86::VFMSUB213SSm_Int },
- { X86::VFMSUB213SSZr , X86::VFMSUB213SSr },
- { X86::VFMSUB213SSZr_Int , X86::VFMSUB213SSr_Int },
- { X86::VFMSUB231SDZm , X86::VFMSUB231SDm },
- { X86::VFMSUB231SDZm_Int , X86::VFMSUB231SDm_Int },
- { X86::VFMSUB231SDZr , X86::VFMSUB231SDr },
- { X86::VFMSUB231SDZr_Int , X86::VFMSUB231SDr_Int },
- { X86::VFMSUB231SSZm , X86::VFMSUB231SSm },
- { X86::VFMSUB231SSZm_Int , X86::VFMSUB231SSm_Int },
- { X86::VFMSUB231SSZr , X86::VFMSUB231SSr },
- { X86::VFMSUB231SSZr_Int , X86::VFMSUB231SSr_Int },
- { X86::VFNMADD132SDZm , X86::VFNMADD132SDm },
- { X86::VFNMADD132SDZm_Int , X86::VFNMADD132SDm_Int },
- { X86::VFNMADD132SDZr , X86::VFNMADD132SDr },
- { X86::VFNMADD132SDZr_Int , X86::VFNMADD132SDr_Int },
- { X86::VFNMADD132SSZm , X86::VFNMADD132SSm },
- { X86::VFNMADD132SSZm_Int , X86::VFNMADD132SSm_Int },
- { X86::VFNMADD132SSZr , X86::VFNMADD132SSr },
- { X86::VFNMADD132SSZr_Int , X86::VFNMADD132SSr_Int },
- { X86::VFNMADD213SDZm , X86::VFNMADD213SDm },
- { X86::VFNMADD213SDZm_Int , X86::VFNMADD213SDm_Int },
- { X86::VFNMADD213SDZr , X86::VFNMADD213SDr },
- { X86::VFNMADD213SDZr_Int , X86::VFNMADD213SDr_Int },
- { X86::VFNMADD213SSZm , X86::VFNMADD213SSm },
- { X86::VFNMADD213SSZm_Int , X86::VFNMADD213SSm_Int },
- { X86::VFNMADD213SSZr , X86::VFNMADD213SSr },
- { X86::VFNMADD213SSZr_Int , X86::VFNMADD213SSr_Int },
- { X86::VFNMADD231SDZm , X86::VFNMADD231SDm },
- { X86::VFNMADD231SDZm_Int , X86::VFNMADD231SDm_Int },
- { X86::VFNMADD231SDZr , X86::VFNMADD231SDr },
- { X86::VFNMADD231SDZr_Int , X86::VFNMADD231SDr_Int },
- { X86::VFNMADD231SSZm , X86::VFNMADD231SSm },
- { X86::VFNMADD231SSZm_Int , X86::VFNMADD231SSm_Int },
- { X86::VFNMADD231SSZr , X86::VFNMADD231SSr },
- { X86::VFNMADD231SSZr_Int , X86::VFNMADD231SSr_Int },
- { X86::VFNMSUB132SDZm , X86::VFNMSUB132SDm },
- { X86::VFNMSUB132SDZm_Int , X86::VFNMSUB132SDm_Int },
- { X86::VFNMSUB132SDZr , X86::VFNMSUB132SDr },
- { X86::VFNMSUB132SDZr_Int , X86::VFNMSUB132SDr_Int },
- { X86::VFNMSUB132SSZm , X86::VFNMSUB132SSm },
- { X86::VFNMSUB132SSZm_Int , X86::VFNMSUB132SSm_Int },
- { X86::VFNMSUB132SSZr , X86::VFNMSUB132SSr },
- { X86::VFNMSUB132SSZr_Int , X86::VFNMSUB132SSr_Int },
- { X86::VFNMSUB213SDZm , X86::VFNMSUB213SDm },
- { X86::VFNMSUB213SDZm_Int , X86::VFNMSUB213SDm_Int },
- { X86::VFNMSUB213SDZr , X86::VFNMSUB213SDr },
- { X86::VFNMSUB213SDZr_Int , X86::VFNMSUB213SDr_Int },
- { X86::VFNMSUB213SSZm , X86::VFNMSUB213SSm },
- { X86::VFNMSUB213SSZm_Int , X86::VFNMSUB213SSm_Int },
- { X86::VFNMSUB213SSZr , X86::VFNMSUB213SSr },
- { X86::VFNMSUB213SSZr_Int , X86::VFNMSUB213SSr_Int },
- { X86::VFNMSUB231SDZm , X86::VFNMSUB231SDm },
- { X86::VFNMSUB231SDZm_Int , X86::VFNMSUB231SDm_Int },
- { X86::VFNMSUB231SDZr , X86::VFNMSUB231SDr },
- { X86::VFNMSUB231SDZr_Int , X86::VFNMSUB231SDr_Int },
- { X86::VFNMSUB231SSZm , X86::VFNMSUB231SSm },
- { X86::VFNMSUB231SSZm_Int , X86::VFNMSUB231SSm_Int },
- { X86::VFNMSUB231SSZr , X86::VFNMSUB231SSr },
- { X86::VFNMSUB231SSZr_Int , X86::VFNMSUB231SSr_Int },
- { X86::VMAXCSDZrm , X86::VMAXCSDrm },
- { X86::VMAXCSDZrr , X86::VMAXCSDrr },
- { X86::VMAXCSSZrm , X86::VMAXCSSrm },
- { X86::VMAXCSSZrr , X86::VMAXCSSrr },
- { X86::VMAXSDZrm , X86::VMAXSDrm },
- { X86::VMAXSDZrm_Int , X86::VMAXSDrm_Int },
- { X86::VMAXSDZrr , X86::VMAXSDrr },
- { X86::VMAXSDZrr_Int , X86::VMAXSDrr_Int },
- { X86::VMAXSSZrm , X86::VMAXSSrm },
- { X86::VMAXSSZrm_Int , X86::VMAXSSrm_Int },
- { X86::VMAXSSZrr , X86::VMAXSSrr },
- { X86::VMAXSSZrr_Int , X86::VMAXSSrr_Int },
- { X86::VMINCSDZrm , X86::VMINCSDrm },
- { X86::VMINCSDZrr , X86::VMINCSDrr },
- { X86::VMINCSSZrm , X86::VMINCSSrm },
- { X86::VMINCSSZrr , X86::VMINCSSrr },
- { X86::VMINSDZrm , X86::VMINSDrm },
- { X86::VMINSDZrm_Int , X86::VMINSDrm_Int },
- { X86::VMINSDZrr , X86::VMINSDrr },
- { X86::VMINSDZrr_Int , X86::VMINSDrr_Int },
- { X86::VMINSSZrm , X86::VMINSSrm },
- { X86::VMINSSZrm_Int , X86::VMINSSrm_Int },
- { X86::VMINSSZrr , X86::VMINSSrr },
- { X86::VMINSSZrr_Int , X86::VMINSSrr_Int },
- { X86::VMOV64toSDZrr , X86::VMOV64toSDrr },
- { X86::VMOVDI2SSZrm , X86::VMOVDI2SSrm },
- { X86::VMOVDI2SSZrr , X86::VMOVDI2SSrr },
- { X86::VMOVSDZmr , X86::VMOVSDmr },
- { X86::VMOVSDZrm , X86::VMOVSDrm },
- { X86::VMOVSDZrr , X86::VMOVSDrr },
- { X86::VMOVSSZmr , X86::VMOVSSmr },
- { X86::VMOVSSZrm , X86::VMOVSSrm },
- { X86::VMOVSSZrr , X86::VMOVSSrr },
- { X86::VMOVSSZrr_REV , X86::VMOVSSrr_REV },
- { X86::VMULSDZrm , X86::VMULSDrm },
- { X86::VMULSDZrm_Int , X86::VMULSDrm_Int },
- { X86::VMULSDZrr , X86::VMULSDrr },
- { X86::VMULSDZrr_Int , X86::VMULSDrr_Int },
- { X86::VMULSSZrm , X86::VMULSSrm },
- { X86::VMULSSZrm_Int , X86::VMULSSrm_Int },
- { X86::VMULSSZrr , X86::VMULSSrr },
- { X86::VMULSSZrr_Int , X86::VMULSSrr_Int },
- { X86::VSQRTSDZm , X86::VSQRTSDm },
- { X86::VSQRTSDZm_Int , X86::VSQRTSDm_Int },
- { X86::VSQRTSDZr , X86::VSQRTSDr },
- { X86::VSQRTSDZr_Int , X86::VSQRTSDr_Int },
- { X86::VSQRTSSZm , X86::VSQRTSSm },
- { X86::VSQRTSSZm_Int , X86::VSQRTSSm_Int },
- { X86::VSQRTSSZr , X86::VSQRTSSr },
- { X86::VSQRTSSZr_Int , X86::VSQRTSSr_Int },
- { X86::VSUBSDZrm , X86::VSUBSDrm },
- { X86::VSUBSDZrm_Int , X86::VSUBSDrm_Int },
- { X86::VSUBSDZrr , X86::VSUBSDrr },
- { X86::VSUBSDZrr_Int , X86::VSUBSDrr_Int },
- { X86::VSUBSSZrm , X86::VSUBSSrm },
- { X86::VSUBSSZrm_Int , X86::VSUBSSrm_Int },
- { X86::VSUBSSZrr , X86::VSUBSSrr },
- { X86::VSUBSSZrr_Int , X86::VSUBSSrr_Int },
- { X86::VUCOMISDZrm , X86::VUCOMISDrm },
- { X86::VUCOMISDZrr , X86::VUCOMISDrr },
- { X86::VUCOMISSZrm , X86::VUCOMISSrm },
- { X86::VUCOMISSZrr , X86::VUCOMISSrr },
-
- { X86::VMOV64toPQIZrr , X86::VMOV64toPQIrr },
- { X86::VMOV64toSDZrr , X86::VMOV64toSDrr },
- { X86::VMOVDI2PDIZrm , X86::VMOVDI2PDIrm },
- { X86::VMOVDI2PDIZrr , X86::VMOVDI2PDIrr },
- { X86::VMOVLHPSZrr , X86::VMOVLHPSrr },
- { X86::VMOVHLPSZrr , X86::VMOVHLPSrr },
- { X86::VMOVPDI2DIZmr , X86::VMOVPDI2DImr },
- { X86::VMOVPDI2DIZrr , X86::VMOVPDI2DIrr },
- { X86::VMOVPQI2QIZmr , X86::VMOVPQI2QImr },
- { X86::VMOVPQIto64Zrr , X86::VMOVPQIto64rr },
- { X86::VMOVQI2PQIZrm , X86::VMOVQI2PQIrm },
- { X86::VMOVZPQILo2PQIZrr , X86::VMOVZPQILo2PQIrr },
-
- { X86::VPEXTRBZmr , X86::VPEXTRBmr },
- { X86::VPEXTRBZrr , X86::VPEXTRBrr },
- { X86::VPEXTRDZmr , X86::VPEXTRDmr },
- { X86::VPEXTRDZrr , X86::VPEXTRDrr },
- { X86::VPEXTRQZmr , X86::VPEXTRQmr },
- { X86::VPEXTRQZrr , X86::VPEXTRQrr },
- { X86::VPEXTRWZmr , X86::VPEXTRWmr },
- { X86::VPEXTRWZrr , X86::VPEXTRWri },
-
- { X86::VPINSRBZrm , X86::VPINSRBrm },
- { X86::VPINSRBZrr , X86::VPINSRBrr },
- { X86::VPINSRDZrm , X86::VPINSRDrm },
- { X86::VPINSRDZrr , X86::VPINSRDrr },
- { X86::VPINSRQZrm , X86::VPINSRQrm },
- { X86::VPINSRQZrr , X86::VPINSRQrr },
- { X86::VPINSRWZrm , X86::VPINSRWrmi },
- { X86::VPINSRWZrr , X86::VPINSRWrri },
-
- // EVEX 128 with corresponding VEX.
- { X86::VADDPDZ128rm , X86::VADDPDrm },
- { X86::VADDPDZ128rr , X86::VADDPDrr },
- { X86::VADDPSZ128rm , X86::VADDPSrm },
- { X86::VADDPSZ128rr , X86::VADDPSrr },
- { X86::VANDNPDZ128rm , X86::VANDNPDrm },
- { X86::VANDNPDZ128rr , X86::VANDNPDrr },
- { X86::VANDNPSZ128rm , X86::VANDNPSrm },
- { X86::VANDNPSZ128rr , X86::VANDNPSrr },
- { X86::VANDPDZ128rm , X86::VANDPDrm },
- { X86::VANDPDZ128rr , X86::VANDPDrr },
- { X86::VANDPSZ128rm , X86::VANDPSrm },
- { X86::VANDPSZ128rr , X86::VANDPSrr },
- { X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm },
- { X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr },
- { X86::VBROADCASTSSZ128r_s , X86::VBROADCASTSSrr },
- { X86::VCVTDQ2PDZ128rm , X86::VCVTDQ2PDrm },
- { X86::VCVTDQ2PDZ128rr , X86::VCVTDQ2PDrr },
- { X86::VCVTDQ2PSZ128rm , X86::VCVTDQ2PSrm },
- { X86::VCVTDQ2PSZ128rr , X86::VCVTDQ2PSrr },
- { X86::VCVTPD2DQZ128rm , X86::VCVTPD2DQrm },
- { X86::VCVTPD2DQZ128rr , X86::VCVTPD2DQrr },
- { X86::VCVTPD2PSZ128rm , X86::VCVTPD2PSrm },
- { X86::VCVTPD2PSZ128rr , X86::VCVTPD2PSrr },
- { X86::VCVTPH2PSZ128rm , X86::VCVTPH2PSrm },
- { X86::VCVTPH2PSZ128rr , X86::VCVTPH2PSrr },
- { X86::VCVTPS2DQZ128rm , X86::VCVTPS2DQrm },
- { X86::VCVTPS2DQZ128rr , X86::VCVTPS2DQrr },
- { X86::VCVTPS2PDZ128rm , X86::VCVTPS2PDrm },
- { X86::VCVTPS2PDZ128rr , X86::VCVTPS2PDrr },
- { X86::VCVTPS2PHZ128mr , X86::VCVTPS2PHmr },
- { X86::VCVTPS2PHZ128rr , X86::VCVTPS2PHrr },
- { X86::VCVTTPD2DQZ128rm , X86::VCVTTPD2DQrm },
- { X86::VCVTTPD2DQZ128rr , X86::VCVTTPD2DQrr },
- { X86::VCVTTPS2DQZ128rm , X86::VCVTTPS2DQrm },
- { X86::VCVTTPS2DQZ128rr , X86::VCVTTPS2DQrr },
- { X86::VDIVPDZ128rm , X86::VDIVPDrm },
- { X86::VDIVPDZ128rr , X86::VDIVPDrr },
- { X86::VDIVPSZ128rm , X86::VDIVPSrm },
- { X86::VDIVPSZ128rr , X86::VDIVPSrr },
- { X86::VFMADD132PDZ128m , X86::VFMADD132PDm },
- { X86::VFMADD132PDZ128r , X86::VFMADD132PDr },
- { X86::VFMADD132PSZ128m , X86::VFMADD132PSm },
- { X86::VFMADD132PSZ128r , X86::VFMADD132PSr },
- { X86::VFMADD213PDZ128m , X86::VFMADD213PDm },
- { X86::VFMADD213PDZ128r , X86::VFMADD213PDr },
- { X86::VFMADD213PSZ128m , X86::VFMADD213PSm },
- { X86::VFMADD213PSZ128r , X86::VFMADD213PSr },
- { X86::VFMADD231PDZ128m , X86::VFMADD231PDm },
- { X86::VFMADD231PDZ128r , X86::VFMADD231PDr },
- { X86::VFMADD231PSZ128m , X86::VFMADD231PSm },
- { X86::VFMADD231PSZ128r , X86::VFMADD231PSr },
- { X86::VFMADDSUB132PDZ128m , X86::VFMADDSUB132PDm },
- { X86::VFMADDSUB132PDZ128r , X86::VFMADDSUB132PDr },
- { X86::VFMADDSUB132PSZ128m , X86::VFMADDSUB132PSm },
- { X86::VFMADDSUB132PSZ128r , X86::VFMADDSUB132PSr },
- { X86::VFMADDSUB213PDZ128m , X86::VFMADDSUB213PDm },
- { X86::VFMADDSUB213PDZ128r , X86::VFMADDSUB213PDr },
- { X86::VFMADDSUB213PSZ128m , X86::VFMADDSUB213PSm },
- { X86::VFMADDSUB213PSZ128r , X86::VFMADDSUB213PSr },
- { X86::VFMADDSUB231PDZ128m , X86::VFMADDSUB231PDm },
- { X86::VFMADDSUB231PDZ128r , X86::VFMADDSUB231PDr },
- { X86::VFMADDSUB231PSZ128m , X86::VFMADDSUB231PSm },
- { X86::VFMADDSUB231PSZ128r , X86::VFMADDSUB231PSr },
- { X86::VFMSUB132PDZ128m , X86::VFMSUB132PDm },
- { X86::VFMSUB132PDZ128r , X86::VFMSUB132PDr },
- { X86::VFMSUB132PSZ128m , X86::VFMSUB132PSm },
- { X86::VFMSUB132PSZ128r , X86::VFMSUB132PSr },
- { X86::VFMSUB213PDZ128m , X86::VFMSUB213PDm },
- { X86::VFMSUB213PDZ128r , X86::VFMSUB213PDr },
- { X86::VFMSUB213PSZ128m , X86::VFMSUB213PSm },
- { X86::VFMSUB213PSZ128r , X86::VFMSUB213PSr },
- { X86::VFMSUB231PDZ128m , X86::VFMSUB231PDm },
- { X86::VFMSUB231PDZ128r , X86::VFMSUB231PDr },
- { X86::VFMSUB231PSZ128m , X86::VFMSUB231PSm },
- { X86::VFMSUB231PSZ128r , X86::VFMSUB231PSr },
- { X86::VFMSUBADD132PDZ128m , X86::VFMSUBADD132PDm },
- { X86::VFMSUBADD132PDZ128r , X86::VFMSUBADD132PDr },
- { X86::VFMSUBADD132PSZ128m , X86::VFMSUBADD132PSm },
- { X86::VFMSUBADD132PSZ128r , X86::VFMSUBADD132PSr },
- { X86::VFMSUBADD213PDZ128m , X86::VFMSUBADD213PDm },
- { X86::VFMSUBADD213PDZ128r , X86::VFMSUBADD213PDr },
- { X86::VFMSUBADD213PSZ128m , X86::VFMSUBADD213PSm },
- { X86::VFMSUBADD213PSZ128r , X86::VFMSUBADD213PSr },
- { X86::VFMSUBADD231PDZ128m , X86::VFMSUBADD231PDm },
- { X86::VFMSUBADD231PDZ128r , X86::VFMSUBADD231PDr },
- { X86::VFMSUBADD231PSZ128m , X86::VFMSUBADD231PSm },
- { X86::VFMSUBADD231PSZ128r , X86::VFMSUBADD231PSr },
- { X86::VFNMADD132PDZ128m , X86::VFNMADD132PDm },
- { X86::VFNMADD132PDZ128r , X86::VFNMADD132PDr },
- { X86::VFNMADD132PSZ128m , X86::VFNMADD132PSm },
- { X86::VFNMADD132PSZ128r , X86::VFNMADD132PSr },
- { X86::VFNMADD213PDZ128m , X86::VFNMADD213PDm },
- { X86::VFNMADD213PDZ128r , X86::VFNMADD213PDr },
- { X86::VFNMADD213PSZ128m , X86::VFNMADD213PSm },
- { X86::VFNMADD213PSZ128r , X86::VFNMADD213PSr },
- { X86::VFNMADD231PDZ128m , X86::VFNMADD231PDm },
- { X86::VFNMADD231PDZ128r , X86::VFNMADD231PDr },
- { X86::VFNMADD231PSZ128m , X86::VFNMADD231PSm },
- { X86::VFNMADD231PSZ128r , X86::VFNMADD231PSr },
- { X86::VFNMSUB132PDZ128m , X86::VFNMSUB132PDm },
- { X86::VFNMSUB132PDZ128r , X86::VFNMSUB132PDr },
- { X86::VFNMSUB132PSZ128m , X86::VFNMSUB132PSm },
- { X86::VFNMSUB132PSZ128r , X86::VFNMSUB132PSr },
- { X86::VFNMSUB213PDZ128m , X86::VFNMSUB213PDm },
- { X86::VFNMSUB213PDZ128r , X86::VFNMSUB213PDr },
- { X86::VFNMSUB213PSZ128m , X86::VFNMSUB213PSm },
- { X86::VFNMSUB213PSZ128r , X86::VFNMSUB213PSr },
- { X86::VFNMSUB231PDZ128m , X86::VFNMSUB231PDm },
- { X86::VFNMSUB231PDZ128r , X86::VFNMSUB231PDr },
- { X86::VFNMSUB231PSZ128m , X86::VFNMSUB231PSm },
- { X86::VFNMSUB231PSZ128r , X86::VFNMSUB231PSr },
- { X86::VMAXCPDZ128rm , X86::VMAXCPDrm },
- { X86::VMAXCPDZ128rr , X86::VMAXCPDrr },
- { X86::VMAXCPSZ128rm , X86::VMAXCPSrm },
- { X86::VMAXCPSZ128rr , X86::VMAXCPSrr },
- { X86::VMAXPDZ128rm , X86::VMAXPDrm },
- { X86::VMAXPDZ128rr , X86::VMAXPDrr },
- { X86::VMAXPSZ128rm , X86::VMAXPSrm },
- { X86::VMAXPSZ128rr , X86::VMAXPSrr },
- { X86::VMINCPDZ128rm , X86::VMINCPDrm },
- { X86::VMINCPDZ128rr , X86::VMINCPDrr },
- { X86::VMINCPSZ128rm , X86::VMINCPSrm },
- { X86::VMINCPSZ128rr , X86::VMINCPSrr },
- { X86::VMINPDZ128rm , X86::VMINPDrm },
- { X86::VMINPDZ128rr , X86::VMINPDrr },
- { X86::VMINPSZ128rm , X86::VMINPSrm },
- { X86::VMINPSZ128rr , X86::VMINPSrr },
- { X86::VMOVAPDZ128mr , X86::VMOVAPDmr },
- { X86::VMOVAPDZ128rm , X86::VMOVAPDrm },
- { X86::VMOVAPDZ128rr , X86::VMOVAPDrr },
- { X86::VMOVAPDZ128rr_REV , X86::VMOVAPDrr_REV },
- { X86::VMOVAPSZ128mr , X86::VMOVAPSmr },
- { X86::VMOVAPSZ128rm , X86::VMOVAPSrm },
- { X86::VMOVAPSZ128rr , X86::VMOVAPSrr },
- { X86::VMOVAPSZ128rr_REV , X86::VMOVAPSrr_REV },
- { X86::VMOVDDUPZ128rm , X86::VMOVDDUPrm },
- { X86::VMOVDDUPZ128rr , X86::VMOVDDUPrr },
- { X86::VMOVDQA32Z128mr , X86::VMOVDQAmr },
- { X86::VMOVDQA32Z128rm , X86::VMOVDQArm },
- { X86::VMOVDQA32Z128rr , X86::VMOVDQArr },
- { X86::VMOVDQA32Z128rr_REV , X86::VMOVDQArr_REV },
- { X86::VMOVDQA64Z128mr , X86::VMOVDQAmr },
- { X86::VMOVDQA64Z128rm , X86::VMOVDQArm },
- { X86::VMOVDQA64Z128rr , X86::VMOVDQArr },
- { X86::VMOVDQA64Z128rr_REV , X86::VMOVDQArr_REV },
- { X86::VMOVDQU16Z128mr , X86::VMOVDQUmr },
- { X86::VMOVDQU16Z128rm , X86::VMOVDQUrm },
- { X86::VMOVDQU16Z128rr , X86::VMOVDQUrr },
- { X86::VMOVDQU16Z128rr_REV , X86::VMOVDQUrr_REV },
- { X86::VMOVDQU32Z128mr , X86::VMOVDQUmr },
- { X86::VMOVDQU32Z128rm , X86::VMOVDQUrm },
- { X86::VMOVDQU32Z128rr , X86::VMOVDQUrr },
- { X86::VMOVDQU32Z128rr_REV , X86::VMOVDQUrr_REV },
- { X86::VMOVDQU64Z128mr , X86::VMOVDQUmr },
- { X86::VMOVDQU64Z128rm , X86::VMOVDQUrm },
- { X86::VMOVDQU64Z128rr , X86::VMOVDQUrr },
- { X86::VMOVDQU64Z128rr_REV , X86::VMOVDQUrr_REV },
- { X86::VMOVDQU8Z128mr , X86::VMOVDQUmr },
- { X86::VMOVDQU8Z128rm , X86::VMOVDQUrm },
- { X86::VMOVDQU8Z128rr , X86::VMOVDQUrr },
- { X86::VMOVDQU8Z128rr_REV , X86::VMOVDQUrr_REV },
- { X86::VMOVHPDZ128mr , X86::VMOVHPDmr },
- { X86::VMOVHPDZ128rm , X86::VMOVHPDrm },
- { X86::VMOVHPSZ128mr , X86::VMOVHPSmr },
- { X86::VMOVHPSZ128rm , X86::VMOVHPSrm },
- { X86::VMOVLPDZ128mr , X86::VMOVLPDmr },
- { X86::VMOVLPDZ128rm , X86::VMOVLPDrm },
- { X86::VMOVLPSZ128mr , X86::VMOVLPSmr },
- { X86::VMOVLPSZ128rm , X86::VMOVLPSrm },
- { X86::VMOVNTDQAZ128rm , X86::VMOVNTDQArm },
- { X86::VMOVNTDQZ128mr , X86::VMOVNTDQmr },
- { X86::VMOVNTPDZ128mr , X86::VMOVNTPDmr },
- { X86::VMOVNTPSZ128mr , X86::VMOVNTPSmr },
- { X86::VMOVSHDUPZ128rm , X86::VMOVSHDUPrm },
- { X86::VMOVSHDUPZ128rr , X86::VMOVSHDUPrr },
- { X86::VMOVSLDUPZ128rm , X86::VMOVSLDUPrm },
- { X86::VMOVSLDUPZ128rr , X86::VMOVSLDUPrr },
- { X86::VMOVUPDZ128mr , X86::VMOVUPDmr },
- { X86::VMOVUPDZ128rm , X86::VMOVUPDrm },
- { X86::VMOVUPDZ128rr , X86::VMOVUPDrr },
- { X86::VMOVUPDZ128rr_REV , X86::VMOVUPDrr_REV },
- { X86::VMOVUPSZ128mr , X86::VMOVUPSmr },
- { X86::VMOVUPSZ128rm , X86::VMOVUPSrm },
- { X86::VMOVUPSZ128rr , X86::VMOVUPSrr },
- { X86::VMOVUPSZ128rr_REV , X86::VMOVUPSrr_REV },
- { X86::VMULPDZ128rm , X86::VMULPDrm },
- { X86::VMULPDZ128rr , X86::VMULPDrr },
- { X86::VMULPSZ128rm , X86::VMULPSrm },
- { X86::VMULPSZ128rr , X86::VMULPSrr },
- { X86::VORPDZ128rm , X86::VORPDrm },
- { X86::VORPDZ128rr , X86::VORPDrr },
- { X86::VORPSZ128rm , X86::VORPSrm },
- { X86::VORPSZ128rr , X86::VORPSrr },
- { X86::VPABSBZ128rm , X86::VPABSBrm },
- { X86::VPABSBZ128rr , X86::VPABSBrr },
- { X86::VPABSDZ128rm , X86::VPABSDrm },
- { X86::VPABSDZ128rr , X86::VPABSDrr },
- { X86::VPABSWZ128rm , X86::VPABSWrm },
- { X86::VPABSWZ128rr , X86::VPABSWrr },
- { X86::VPACKSSDWZ128rm , X86::VPACKSSDWrm },
- { X86::VPACKSSDWZ128rr , X86::VPACKSSDWrr },
- { X86::VPACKSSWBZ128rm , X86::VPACKSSWBrm },
- { X86::VPACKSSWBZ128rr , X86::VPACKSSWBrr },
- { X86::VPACKUSDWZ128rm , X86::VPACKUSDWrm },
- { X86::VPACKUSDWZ128rr , X86::VPACKUSDWrr },
- { X86::VPACKUSWBZ128rm , X86::VPACKUSWBrm },
- { X86::VPACKUSWBZ128rr , X86::VPACKUSWBrr },
- { X86::VPADDBZ128rm , X86::VPADDBrm },
- { X86::VPADDBZ128rr , X86::VPADDBrr },
- { X86::VPADDDZ128rm , X86::VPADDDrm },
- { X86::VPADDDZ128rr , X86::VPADDDrr },
- { X86::VPADDQZ128rm , X86::VPADDQrm },
- { X86::VPADDQZ128rr , X86::VPADDQrr },
- { X86::VPADDSBZ128rm , X86::VPADDSBrm },
- { X86::VPADDSBZ128rr , X86::VPADDSBrr },
- { X86::VPADDSWZ128rm , X86::VPADDSWrm },
- { X86::VPADDSWZ128rr , X86::VPADDSWrr },
- { X86::VPADDUSBZ128rm , X86::VPADDUSBrm },
- { X86::VPADDUSBZ128rr , X86::VPADDUSBrr },
- { X86::VPADDUSWZ128rm , X86::VPADDUSWrm },
- { X86::VPADDUSWZ128rr , X86::VPADDUSWrr },
- { X86::VPADDWZ128rm , X86::VPADDWrm },
- { X86::VPADDWZ128rr , X86::VPADDWrr },
- { X86::VPALIGNRZ128rmi , X86::VPALIGNRrmi },
- { X86::VPALIGNRZ128rri , X86::VPALIGNRrri },
- { X86::VPANDDZ128rm , X86::VPANDrm },
- { X86::VPANDDZ128rr , X86::VPANDrr },
- { X86::VPANDQZ128rm , X86::VPANDrm },
- { X86::VPANDQZ128rr , X86::VPANDrr },
- { X86::VPAVGBZ128rm , X86::VPAVGBrm },
- { X86::VPAVGBZ128rr , X86::VPAVGBrr },
- { X86::VPAVGWZ128rm , X86::VPAVGWrm },
- { X86::VPAVGWZ128rr , X86::VPAVGWrr },
- { X86::VPBROADCASTBZ128m , X86::VPBROADCASTBrm },
- { X86::VPBROADCASTBZ128r , X86::VPBROADCASTBrr },
- { X86::VPBROADCASTDZ128m , X86::VPBROADCASTDrm },
- { X86::VPBROADCASTDZ128r , X86::VPBROADCASTDrr },
- { X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm },
- { X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr },
- { X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm },
- { X86::VPBROADCASTWZ128r , X86::VPBROADCASTWrr },
- { X86::VPERMILPDZ128mi , X86::VPERMILPDmi },
- { X86::VPERMILPDZ128ri , X86::VPERMILPDri },
- { X86::VPERMILPDZ128rm , X86::VPERMILPDrm },
- { X86::VPERMILPDZ128rr , X86::VPERMILPDrr },
- { X86::VPERMILPSZ128mi , X86::VPERMILPSmi },
- { X86::VPERMILPSZ128ri , X86::VPERMILPSri },
- { X86::VPERMILPSZ128rm , X86::VPERMILPSrm },
- { X86::VPERMILPSZ128rr , X86::VPERMILPSrr },
- { X86::VPMADDUBSWZ128rm , X86::VPMADDUBSWrm },
- { X86::VPMADDUBSWZ128rr , X86::VPMADDUBSWrr },
- { X86::VPMADDWDZ128rm , X86::VPMADDWDrm },
- { X86::VPMADDWDZ128rr , X86::VPMADDWDrr },
- { X86::VPMAXSBZ128rm , X86::VPMAXSBrm },
- { X86::VPMAXSBZ128rr , X86::VPMAXSBrr },
- { X86::VPMAXSDZ128rm , X86::VPMAXSDrm },
- { X86::VPMAXSDZ128rr , X86::VPMAXSDrr },
- { X86::VPMAXSWZ128rm , X86::VPMAXSWrm },
- { X86::VPMAXSWZ128rr , X86::VPMAXSWrr },
- { X86::VPMAXUBZ128rm , X86::VPMAXUBrm },
- { X86::VPMAXUBZ128rr , X86::VPMAXUBrr },
- { X86::VPMAXUDZ128rm , X86::VPMAXUDrm },
- { X86::VPMAXUDZ128rr , X86::VPMAXUDrr },
- { X86::VPMAXUWZ128rm , X86::VPMAXUWrm },
- { X86::VPMAXUWZ128rr , X86::VPMAXUWrr },
- { X86::VPMINSBZ128rm , X86::VPMINSBrm },
- { X86::VPMINSBZ128rr , X86::VPMINSBrr },
- { X86::VPMINSDZ128rm , X86::VPMINSDrm },
- { X86::VPMINSDZ128rr , X86::VPMINSDrr },
- { X86::VPMINSWZ128rm , X86::VPMINSWrm },
- { X86::VPMINSWZ128rr , X86::VPMINSWrr },
- { X86::VPMINUBZ128rm , X86::VPMINUBrm },
- { X86::VPMINUBZ128rr , X86::VPMINUBrr },
- { X86::VPMINUDZ128rm , X86::VPMINUDrm },
- { X86::VPMINUDZ128rr , X86::VPMINUDrr },
- { X86::VPMINUWZ128rm , X86::VPMINUWrm },
- { X86::VPMINUWZ128rr , X86::VPMINUWrr },
- { X86::VPMOVSXBDZ128rm , X86::VPMOVSXBDrm },
- { X86::VPMOVSXBDZ128rr , X86::VPMOVSXBDrr },
- { X86::VPMOVSXBQZ128rm , X86::VPMOVSXBQrm },
- { X86::VPMOVSXBQZ128rr , X86::VPMOVSXBQrr },
- { X86::VPMOVSXBWZ128rm , X86::VPMOVSXBWrm },
- { X86::VPMOVSXBWZ128rr , X86::VPMOVSXBWrr },
- { X86::VPMOVSXDQZ128rm , X86::VPMOVSXDQrm },
- { X86::VPMOVSXDQZ128rr , X86::VPMOVSXDQrr },
- { X86::VPMOVSXWDZ128rm , X86::VPMOVSXWDrm },
- { X86::VPMOVSXWDZ128rr , X86::VPMOVSXWDrr },
- { X86::VPMOVSXWQZ128rm , X86::VPMOVSXWQrm },
- { X86::VPMOVSXWQZ128rr , X86::VPMOVSXWQrr },
- { X86::VPMOVZXBDZ128rm , X86::VPMOVZXBDrm },
- { X86::VPMOVZXBDZ128rr , X86::VPMOVZXBDrr },
- { X86::VPMOVZXBQZ128rm , X86::VPMOVZXBQrm },
- { X86::VPMOVZXBQZ128rr , X86::VPMOVZXBQrr },
- { X86::VPMOVZXBWZ128rm , X86::VPMOVZXBWrm },
- { X86::VPMOVZXBWZ128rr , X86::VPMOVZXBWrr },
- { X86::VPMOVZXDQZ128rm , X86::VPMOVZXDQrm },
- { X86::VPMOVZXDQZ128rr , X86::VPMOVZXDQrr },
- { X86::VPMOVZXWDZ128rm , X86::VPMOVZXWDrm },
- { X86::VPMOVZXWDZ128rr , X86::VPMOVZXWDrr },
- { X86::VPMOVZXWQZ128rm , X86::VPMOVZXWQrm },
- { X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr },
- { X86::VPMULDQZ128rm , X86::VPMULDQrm },
- { X86::VPMULDQZ128rr , X86::VPMULDQrr },
- { X86::VPMULHRSWZ128rm , X86::VPMULHRSWrm },
- { X86::VPMULHRSWZ128rr , X86::VPMULHRSWrr },
- { X86::VPMULHUWZ128rm , X86::VPMULHUWrm },
- { X86::VPMULHUWZ128rr , X86::VPMULHUWrr },
- { X86::VPMULHWZ128rm , X86::VPMULHWrm },
- { X86::VPMULHWZ128rr , X86::VPMULHWrr },
- { X86::VPMULLDZ128rm , X86::VPMULLDrm },
- { X86::VPMULLDZ128rr , X86::VPMULLDrr },
- { X86::VPMULLWZ128rm , X86::VPMULLWrm },
- { X86::VPMULLWZ128rr , X86::VPMULLWrr },
- { X86::VPMULUDQZ128rm , X86::VPMULUDQrm },
- { X86::VPMULUDQZ128rr , X86::VPMULUDQrr },
- { X86::VPORDZ128rm , X86::VPORrm },
- { X86::VPORDZ128rr , X86::VPORrr },
- { X86::VPORQZ128rm , X86::VPORrm },
- { X86::VPORQZ128rr , X86::VPORrr },
- { X86::VPSADBWZ128rm , X86::VPSADBWrm },
- { X86::VPSADBWZ128rr , X86::VPSADBWrr },
- { X86::VPSHUFBZ128rm , X86::VPSHUFBrm },
- { X86::VPSHUFBZ128rr , X86::VPSHUFBrr },
- { X86::VPSHUFDZ128mi , X86::VPSHUFDmi },
- { X86::VPSHUFDZ128ri , X86::VPSHUFDri },
- { X86::VPSHUFHWZ128mi , X86::VPSHUFHWmi },
- { X86::VPSHUFHWZ128ri , X86::VPSHUFHWri },
- { X86::VPSHUFLWZ128mi , X86::VPSHUFLWmi },
- { X86::VPSHUFLWZ128ri , X86::VPSHUFLWri },
- { X86::VPSLLDQZ128rr , X86::VPSLLDQri },
- { X86::VPSLLDZ128ri , X86::VPSLLDri },
- { X86::VPSLLDZ128rm , X86::VPSLLDrm },
- { X86::VPSLLDZ128rr , X86::VPSLLDrr },
- { X86::VPSLLQZ128ri , X86::VPSLLQri },
- { X86::VPSLLQZ128rm , X86::VPSLLQrm },
- { X86::VPSLLQZ128rr , X86::VPSLLQrr },
- { X86::VPSLLVDZ128rm , X86::VPSLLVDrm },
- { X86::VPSLLVDZ128rr , X86::VPSLLVDrr },
- { X86::VPSLLVQZ128rm , X86::VPSLLVQrm },
- { X86::VPSLLVQZ128rr , X86::VPSLLVQrr },
- { X86::VPSLLWZ128ri , X86::VPSLLWri },
- { X86::VPSLLWZ128rm , X86::VPSLLWrm },
- { X86::VPSLLWZ128rr , X86::VPSLLWrr },
- { X86::VPSRADZ128ri , X86::VPSRADri },
- { X86::VPSRADZ128rm , X86::VPSRADrm },
- { X86::VPSRADZ128rr , X86::VPSRADrr },
- { X86::VPSRAVDZ128rm , X86::VPSRAVDrm },
- { X86::VPSRAVDZ128rr , X86::VPSRAVDrr },
- { X86::VPSRAWZ128ri , X86::VPSRAWri },
- { X86::VPSRAWZ128rm , X86::VPSRAWrm },
- { X86::VPSRAWZ128rr , X86::VPSRAWrr },
- { X86::VPSRLDQZ128rr , X86::VPSRLDQri },
- { X86::VPSRLDZ128ri , X86::VPSRLDri },
- { X86::VPSRLDZ128rm , X86::VPSRLDrm },
- { X86::VPSRLDZ128rr , X86::VPSRLDrr },
- { X86::VPSRLQZ128ri , X86::VPSRLQri },
- { X86::VPSRLQZ128rm , X86::VPSRLQrm },
- { X86::VPSRLQZ128rr , X86::VPSRLQrr },
- { X86::VPSRLVDZ128rm , X86::VPSRLVDrm },
- { X86::VPSRLVDZ128rr , X86::VPSRLVDrr },
- { X86::VPSRLVQZ128rm , X86::VPSRLVQrm },
- { X86::VPSRLVQZ128rr , X86::VPSRLVQrr },
- { X86::VPSRLWZ128ri , X86::VPSRLWri },
- { X86::VPSRLWZ128rm , X86::VPSRLWrm },
- { X86::VPSRLWZ128rr , X86::VPSRLWrr },
- { X86::VPSUBBZ128rm , X86::VPSUBBrm },
- { X86::VPSUBBZ128rr , X86::VPSUBBrr },
- { X86::VPSUBDZ128rm , X86::VPSUBDrm },
- { X86::VPSUBDZ128rr , X86::VPSUBDrr },
- { X86::VPSUBQZ128rm , X86::VPSUBQrm },
- { X86::VPSUBQZ128rr , X86::VPSUBQrr },
- { X86::VPSUBSBZ128rm , X86::VPSUBSBrm },
- { X86::VPSUBSBZ128rr , X86::VPSUBSBrr },
- { X86::VPSUBSWZ128rm , X86::VPSUBSWrm },
- { X86::VPSUBSWZ128rr , X86::VPSUBSWrr },
- { X86::VPSUBUSBZ128rm , X86::VPSUBUSBrm },
- { X86::VPSUBUSBZ128rr , X86::VPSUBUSBrr },
- { X86::VPSUBUSWZ128rm , X86::VPSUBUSWrm },
- { X86::VPSUBUSWZ128rr , X86::VPSUBUSWrr },
- { X86::VPSUBWZ128rm , X86::VPSUBWrm },
- { X86::VPSUBWZ128rr , X86::VPSUBWrr },
- { X86::VPUNPCKHBWZ128rm , X86::VPUNPCKHBWrm },
- { X86::VPUNPCKHBWZ128rr , X86::VPUNPCKHBWrr },
- { X86::VPUNPCKHDQZ128rm , X86::VPUNPCKHDQrm },
- { X86::VPUNPCKHDQZ128rr , X86::VPUNPCKHDQrr },
- { X86::VPUNPCKHQDQZ128rm , X86::VPUNPCKHQDQrm },
- { X86::VPUNPCKHQDQZ128rr , X86::VPUNPCKHQDQrr },
- { X86::VPUNPCKHWDZ128rm , X86::VPUNPCKHWDrm },
- { X86::VPUNPCKHWDZ128rr , X86::VPUNPCKHWDrr },
- { X86::VPUNPCKLBWZ128rm , X86::VPUNPCKLBWrm },
- { X86::VPUNPCKLBWZ128rr , X86::VPUNPCKLBWrr },
- { X86::VPUNPCKLDQZ128rm , X86::VPUNPCKLDQrm },
- { X86::VPUNPCKLDQZ128rr , X86::VPUNPCKLDQrr },
- { X86::VPUNPCKLQDQZ128rm , X86::VPUNPCKLQDQrm },
- { X86::VPUNPCKLQDQZ128rr , X86::VPUNPCKLQDQrr },
- { X86::VPUNPCKLWDZ128rm , X86::VPUNPCKLWDrm },
- { X86::VPUNPCKLWDZ128rr , X86::VPUNPCKLWDrr },
- { X86::VPXORDZ128rm , X86::VPXORrm },
- { X86::VPXORDZ128rr , X86::VPXORrr },
- { X86::VPXORQZ128rm , X86::VPXORrm },
- { X86::VPXORQZ128rr , X86::VPXORrr },
- { X86::VSHUFPDZ128rmi , X86::VSHUFPDrmi },
- { X86::VSHUFPDZ128rri , X86::VSHUFPDrri },
- { X86::VSHUFPSZ128rmi , X86::VSHUFPSrmi },
- { X86::VSHUFPSZ128rri , X86::VSHUFPSrri },
- { X86::VSQRTPDZ128m , X86::VSQRTPDm },
- { X86::VSQRTPDZ128r , X86::VSQRTPDr },
- { X86::VSQRTPSZ128m , X86::VSQRTPSm },
- { X86::VSQRTPSZ128r , X86::VSQRTPSr },
- { X86::VSUBPDZ128rm , X86::VSUBPDrm },
- { X86::VSUBPDZ128rr , X86::VSUBPDrr },
- { X86::VSUBPSZ128rm , X86::VSUBPSrm },
- { X86::VSUBPSZ128rr , X86::VSUBPSrr },
- { X86::VUNPCKHPDZ128rm , X86::VUNPCKHPDrm },
- { X86::VUNPCKHPDZ128rr , X86::VUNPCKHPDrr },
- { X86::VUNPCKHPSZ128rm , X86::VUNPCKHPSrm },
- { X86::VUNPCKHPSZ128rr , X86::VUNPCKHPSrr },
- { X86::VUNPCKLPDZ128rm , X86::VUNPCKLPDrm },
- { X86::VUNPCKLPDZ128rr , X86::VUNPCKLPDrr },
- { X86::VUNPCKLPSZ128rm , X86::VUNPCKLPSrm },
- { X86::VUNPCKLPSZ128rr , X86::VUNPCKLPSrr },
- { X86::VXORPDZ128rm , X86::VXORPDrm },
- { X86::VXORPDZ128rr , X86::VXORPDrr },
- { X86::VXORPSZ128rm , X86::VXORPSrm },
- { X86::VXORPSZ128rr , X86::VXORPSrr },
-};
-
-
-// X86 EVEX encoded instructions that have a VEX 256 encoding
-// (table format: <EVEX opcode, VEX-256 opcode>).
- static const X86EvexToVexCompressTableEntry X86EvexToVex256CompressTable[] = {
- { X86::VADDPDZ256rm , X86::VADDPDYrm },
- { X86::VADDPDZ256rr , X86::VADDPDYrr },
- { X86::VADDPSZ256rm , X86::VADDPSYrm },
- { X86::VADDPSZ256rr , X86::VADDPSYrr },
- { X86::VANDNPDZ256rm , X86::VANDNPDYrm },
- { X86::VANDNPDZ256rr , X86::VANDNPDYrr },
- { X86::VANDNPSZ256rm , X86::VANDNPSYrm },
- { X86::VANDNPSZ256rr , X86::VANDNPSYrr },
- { X86::VANDPDZ256rm , X86::VANDPDYrm },
- { X86::VANDPDZ256rr , X86::VANDPDYrr },
- { X86::VANDPSZ256rm , X86::VANDPSYrm },
- { X86::VANDPSZ256rr , X86::VANDPSYrr },
- { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm },
- { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr },
- { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr },
- { X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm },
- { X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr },
- { X86::VBROADCASTSSZ256r_s , X86::VBROADCASTSSYrr },
- { X86::VCVTDQ2PDZ256rm , X86::VCVTDQ2PDYrm },
- { X86::VCVTDQ2PDZ256rr , X86::VCVTDQ2PDYrr },
- { X86::VCVTDQ2PSZ256rm , X86::VCVTDQ2PSYrm },
- { X86::VCVTDQ2PSZ256rr , X86::VCVTDQ2PSYrr },
- { X86::VCVTPD2DQZ256rm , X86::VCVTPD2DQYrm },
- { X86::VCVTPD2DQZ256rr , X86::VCVTPD2DQYrr },
- { X86::VCVTPD2PSZ256rm , X86::VCVTPD2PSYrm },
- { X86::VCVTPD2PSZ256rr , X86::VCVTPD2PSYrr },
- { X86::VCVTPH2PSZ256rm , X86::VCVTPH2PSYrm },
- { X86::VCVTPH2PSZ256rr , X86::VCVTPH2PSYrr },
- { X86::VCVTPS2DQZ256rm , X86::VCVTPS2DQYrm },
- { X86::VCVTPS2DQZ256rr , X86::VCVTPS2DQYrr },
- { X86::VCVTPS2PDZ256rm , X86::VCVTPS2PDYrm },
- { X86::VCVTPS2PDZ256rr , X86::VCVTPS2PDYrr },
- { X86::VCVTPS2PHZ256mr , X86::VCVTPS2PHYmr },
- { X86::VCVTPS2PHZ256rr , X86::VCVTPS2PHYrr },
- { X86::VCVTTPD2DQZ256rm , X86::VCVTTPD2DQYrm },
- { X86::VCVTTPD2DQZ256rr , X86::VCVTTPD2DQYrr },
- { X86::VCVTTPS2DQZ256rm , X86::VCVTTPS2DQYrm },
- { X86::VCVTTPS2DQZ256rr , X86::VCVTTPS2DQYrr },
- { X86::VDIVPDZ256rm , X86::VDIVPDYrm },
- { X86::VDIVPDZ256rr , X86::VDIVPDYrr },
- { X86::VDIVPSZ256rm , X86::VDIVPSYrm },
- { X86::VDIVPSZ256rr , X86::VDIVPSYrr },
- { X86::VEXTRACTF32x4Z256mr , X86::VEXTRACTF128mr },
- { X86::VEXTRACTF64x2Z256mr , X86::VEXTRACTF128mr },
- { X86::VEXTRACTF32x4Z256rr , X86::VEXTRACTF128rr },
- { X86::VEXTRACTF64x2Z256rr , X86::VEXTRACTF128rr },
- { X86::VEXTRACTI32x4Z256mr , X86::VEXTRACTI128mr },
- { X86::VEXTRACTI64x2Z256mr , X86::VEXTRACTI128mr },
- { X86::VEXTRACTI32x4Z256rr , X86::VEXTRACTI128rr },
- { X86::VEXTRACTI64x2Z256rr , X86::VEXTRACTI128rr },
- { X86::VFMADD132PDZ256m , X86::VFMADD132PDYm },
- { X86::VFMADD132PDZ256r , X86::VFMADD132PDYr },
- { X86::VFMADD132PSZ256m , X86::VFMADD132PSYm },
- { X86::VFMADD132PSZ256r , X86::VFMADD132PSYr },
- { X86::VFMADD213PDZ256m , X86::VFMADD213PDYm },
- { X86::VFMADD213PDZ256r , X86::VFMADD213PDYr },
- { X86::VFMADD213PSZ256m , X86::VFMADD213PSYm },
- { X86::VFMADD213PSZ256r , X86::VFMADD213PSYr },
- { X86::VFMADD231PDZ256m , X86::VFMADD231PDYm },
- { X86::VFMADD231PDZ256r , X86::VFMADD231PDYr },
- { X86::VFMADD231PSZ256m , X86::VFMADD231PSYm },
- { X86::VFMADD231PSZ256r , X86::VFMADD231PSYr },
- { X86::VFMADDSUB132PDZ256m , X86::VFMADDSUB132PDYm },
- { X86::VFMADDSUB132PDZ256r , X86::VFMADDSUB132PDYr },
- { X86::VFMADDSUB132PSZ256m , X86::VFMADDSUB132PSYm },
- { X86::VFMADDSUB132PSZ256r , X86::VFMADDSUB132PSYr },
- { X86::VFMADDSUB213PDZ256m , X86::VFMADDSUB213PDYm },
- { X86::VFMADDSUB213PDZ256r , X86::VFMADDSUB213PDYr },
- { X86::VFMADDSUB213PSZ256m , X86::VFMADDSUB213PSYm },
- { X86::VFMADDSUB213PSZ256r , X86::VFMADDSUB213PSYr },
- { X86::VFMADDSUB231PDZ256m , X86::VFMADDSUB231PDYm },
- { X86::VFMADDSUB231PDZ256r , X86::VFMADDSUB231PDYr },
- { X86::VFMADDSUB231PSZ256m , X86::VFMADDSUB231PSYm },
- { X86::VFMADDSUB231PSZ256r , X86::VFMADDSUB231PSYr },
- { X86::VFMSUB132PDZ256m , X86::VFMSUB132PDYm },
- { X86::VFMSUB132PDZ256r , X86::VFMSUB132PDYr },
- { X86::VFMSUB132PSZ256m , X86::VFMSUB132PSYm },
- { X86::VFMSUB132PSZ256r , X86::VFMSUB132PSYr },
- { X86::VFMSUB213PDZ256m , X86::VFMSUB213PDYm },
- { X86::VFMSUB213PDZ256r , X86::VFMSUB213PDYr },
- { X86::VFMSUB213PSZ256m , X86::VFMSUB213PSYm },
- { X86::VFMSUB213PSZ256r , X86::VFMSUB213PSYr },
- { X86::VFMSUB231PDZ256m , X86::VFMSUB231PDYm },
- { X86::VFMSUB231PDZ256r , X86::VFMSUB231PDYr },
- { X86::VFMSUB231PSZ256m , X86::VFMSUB231PSYm },
- { X86::VFMSUB231PSZ256r , X86::VFMSUB231PSYr },
- { X86::VFMSUBADD132PDZ256m , X86::VFMSUBADD132PDYm },
- { X86::VFMSUBADD132PDZ256r , X86::VFMSUBADD132PDYr },
- { X86::VFMSUBADD132PSZ256m , X86::VFMSUBADD132PSYm },
- { X86::VFMSUBADD132PSZ256r , X86::VFMSUBADD132PSYr },
- { X86::VFMSUBADD213PDZ256m , X86::VFMSUBADD213PDYm },
- { X86::VFMSUBADD213PDZ256r , X86::VFMSUBADD213PDYr },
- { X86::VFMSUBADD213PSZ256m , X86::VFMSUBADD213PSYm },
- { X86::VFMSUBADD213PSZ256r , X86::VFMSUBADD213PSYr },
- { X86::VFMSUBADD231PDZ256m , X86::VFMSUBADD231PDYm },
- { X86::VFMSUBADD231PDZ256r , X86::VFMSUBADD231PDYr },
- { X86::VFMSUBADD231PSZ256m , X86::VFMSUBADD231PSYm },
- { X86::VFMSUBADD231PSZ256r , X86::VFMSUBADD231PSYr },
- { X86::VFNMADD132PDZ256m , X86::VFNMADD132PDYm },
- { X86::VFNMADD132PDZ256r , X86::VFNMADD132PDYr },
- { X86::VFNMADD132PSZ256m , X86::VFNMADD132PSYm },
- { X86::VFNMADD132PSZ256r , X86::VFNMADD132PSYr },
- { X86::VFNMADD213PDZ256m , X86::VFNMADD213PDYm },
- { X86::VFNMADD213PDZ256r , X86::VFNMADD213PDYr },
- { X86::VFNMADD213PSZ256m , X86::VFNMADD213PSYm },
- { X86::VFNMADD213PSZ256r , X86::VFNMADD213PSYr },
- { X86::VFNMADD231PDZ256m , X86::VFNMADD231PDYm },
- { X86::VFNMADD231PDZ256r , X86::VFNMADD231PDYr },
- { X86::VFNMADD231PSZ256m , X86::VFNMADD231PSYm },
- { X86::VFNMADD231PSZ256r , X86::VFNMADD231PSYr },
- { X86::VFNMSUB132PDZ256m , X86::VFNMSUB132PDYm },
- { X86::VFNMSUB132PDZ256r , X86::VFNMSUB132PDYr },
- { X86::VFNMSUB132PSZ256m , X86::VFNMSUB132PSYm },
- { X86::VFNMSUB132PSZ256r , X86::VFNMSUB132PSYr },
- { X86::VFNMSUB213PDZ256m , X86::VFNMSUB213PDYm },
- { X86::VFNMSUB213PDZ256r , X86::VFNMSUB213PDYr },
- { X86::VFNMSUB213PSZ256m , X86::VFNMSUB213PSYm },
- { X86::VFNMSUB213PSZ256r , X86::VFNMSUB213PSYr },
- { X86::VFNMSUB231PDZ256m , X86::VFNMSUB231PDYm },
- { X86::VFNMSUB231PDZ256r , X86::VFNMSUB231PDYr },
- { X86::VFNMSUB231PSZ256m , X86::VFNMSUB231PSYm },
- { X86::VFNMSUB231PSZ256r , X86::VFNMSUB231PSYr },
- { X86::VINSERTF32x4Z256rm , X86::VINSERTF128rm },
- { X86::VINSERTF64x2Z256rm , X86::VINSERTF128rm },
- { X86::VINSERTF32x4Z256rr , X86::VINSERTF128rr },
- { X86::VINSERTF64x2Z256rr , X86::VINSERTF128rr },
- { X86::VINSERTI32x4Z256rm , X86::VINSERTI128rm },
- { X86::VINSERTI64x2Z256rm , X86::VINSERTI128rm },
- { X86::VINSERTI32x4Z256rr , X86::VINSERTI128rr },
- { X86::VINSERTI64x2Z256rr , X86::VINSERTI128rr },
- { X86::VMAXCPDZ256rm , X86::VMAXCPDYrm },
- { X86::VMAXCPDZ256rr , X86::VMAXCPDYrr },
- { X86::VMAXCPSZ256rm , X86::VMAXCPSYrm },
- { X86::VMAXCPSZ256rr , X86::VMAXCPSYrr },
- { X86::VMAXPDZ256rm , X86::VMAXPDYrm },
- { X86::VMAXPDZ256rr , X86::VMAXPDYrr },
- { X86::VMAXPSZ256rm , X86::VMAXPSYrm },
- { X86::VMAXPSZ256rr , X86::VMAXPSYrr },
- { X86::VMINCPDZ256rm , X86::VMINCPDYrm },
- { X86::VMINCPDZ256rr , X86::VMINCPDYrr },
- { X86::VMINCPSZ256rm , X86::VMINCPSYrm },
- { X86::VMINCPSZ256rr , X86::VMINCPSYrr },
- { X86::VMINPDZ256rm , X86::VMINPDYrm },
- { X86::VMINPDZ256rr , X86::VMINPDYrr },
- { X86::VMINPSZ256rm , X86::VMINPSYrm },
- { X86::VMINPSZ256rr , X86::VMINPSYrr },
- { X86::VMOVAPDZ256mr , X86::VMOVAPDYmr },
- { X86::VMOVAPDZ256rm , X86::VMOVAPDYrm },
- { X86::VMOVAPDZ256rr , X86::VMOVAPDYrr },
- { X86::VMOVAPDZ256rr_REV , X86::VMOVAPDYrr_REV },
- { X86::VMOVAPSZ256mr , X86::VMOVAPSYmr },
- { X86::VMOVAPSZ256rm , X86::VMOVAPSYrm },
- { X86::VMOVAPSZ256rr , X86::VMOVAPSYrr },
- { X86::VMOVAPSZ256rr_REV , X86::VMOVAPSYrr_REV },
- { X86::VMOVDDUPZ256rm , X86::VMOVDDUPYrm },
- { X86::VMOVDDUPZ256rr , X86::VMOVDDUPYrr },
- { X86::VMOVDQA32Z256mr , X86::VMOVDQAYmr },
- { X86::VMOVDQA32Z256rm , X86::VMOVDQAYrm },
- { X86::VMOVDQA32Z256rr , X86::VMOVDQAYrr },
- { X86::VMOVDQA32Z256rr_REV , X86::VMOVDQAYrr_REV },
- { X86::VMOVDQA64Z256mr , X86::VMOVDQAYmr },
- { X86::VMOVDQA64Z256rm , X86::VMOVDQAYrm },
- { X86::VMOVDQA64Z256rr , X86::VMOVDQAYrr },
- { X86::VMOVDQA64Z256rr_REV , X86::VMOVDQAYrr_REV },
- { X86::VMOVDQU16Z256mr , X86::VMOVDQUYmr },
- { X86::VMOVDQU16Z256rm , X86::VMOVDQUYrm },
- { X86::VMOVDQU16Z256rr , X86::VMOVDQUYrr },
- { X86::VMOVDQU16Z256rr_REV , X86::VMOVDQUYrr_REV },
- { X86::VMOVDQU32Z256mr , X86::VMOVDQUYmr },
- { X86::VMOVDQU32Z256rm , X86::VMOVDQUYrm },
- { X86::VMOVDQU32Z256rr , X86::VMOVDQUYrr },
- { X86::VMOVDQU32Z256rr_REV , X86::VMOVDQUYrr_REV },
- { X86::VMOVDQU64Z256mr , X86::VMOVDQUYmr },
- { X86::VMOVDQU64Z256rm , X86::VMOVDQUYrm },
- { X86::VMOVDQU64Z256rr , X86::VMOVDQUYrr },
- { X86::VMOVDQU64Z256rr_REV , X86::VMOVDQUYrr_REV },
- { X86::VMOVDQU8Z256mr , X86::VMOVDQUYmr },
- { X86::VMOVDQU8Z256rm , X86::VMOVDQUYrm },
- { X86::VMOVDQU8Z256rr , X86::VMOVDQUYrr },
- { X86::VMOVDQU8Z256rr_REV , X86::VMOVDQUYrr_REV },
- { X86::VMOVNTDQAZ256rm , X86::VMOVNTDQAYrm },
- { X86::VMOVNTDQZ256mr , X86::VMOVNTDQYmr },
- { X86::VMOVNTPDZ256mr , X86::VMOVNTPDYmr },
- { X86::VMOVNTPSZ256mr , X86::VMOVNTPSYmr },
- { X86::VMOVSHDUPZ256rm , X86::VMOVSHDUPYrm },
- { X86::VMOVSHDUPZ256rr , X86::VMOVSHDUPYrr },
- { X86::VMOVSLDUPZ256rm , X86::VMOVSLDUPYrm },
- { X86::VMOVSLDUPZ256rr , X86::VMOVSLDUPYrr },
- { X86::VMOVUPDZ256mr , X86::VMOVUPDYmr },
- { X86::VMOVUPDZ256rm , X86::VMOVUPDYrm },
- { X86::VMOVUPDZ256rr , X86::VMOVUPDYrr },
- { X86::VMOVUPDZ256rr_REV , X86::VMOVUPDYrr_REV },
- { X86::VMOVUPSZ256mr , X86::VMOVUPSYmr },
- { X86::VMOVUPSZ256rm , X86::VMOVUPSYrm },
- { X86::VMOVUPSZ256rr , X86::VMOVUPSYrr },
- { X86::VMOVUPSZ256rr_REV , X86::VMOVUPSYrr_REV },
- { X86::VMULPDZ256rm , X86::VMULPDYrm },
- { X86::VMULPDZ256rr , X86::VMULPDYrr },
- { X86::VMULPSZ256rm , X86::VMULPSYrm },
- { X86::VMULPSZ256rr , X86::VMULPSYrr },
- { X86::VORPDZ256rm , X86::VORPDYrm },
- { X86::VORPDZ256rr , X86::VORPDYrr },
- { X86::VORPSZ256rm , X86::VORPSYrm },
- { X86::VORPSZ256rr , X86::VORPSYrr },
- { X86::VPABSBZ256rm , X86::VPABSBYrm },
- { X86::VPABSBZ256rr , X86::VPABSBYrr },
- { X86::VPABSDZ256rm , X86::VPABSDYrm },
- { X86::VPABSDZ256rr , X86::VPABSDYrr },
- { X86::VPABSWZ256rm , X86::VPABSWYrm },
- { X86::VPABSWZ256rr , X86::VPABSWYrr },
- { X86::VPACKSSDWZ256rm , X86::VPACKSSDWYrm },
- { X86::VPACKSSDWZ256rr , X86::VPACKSSDWYrr },
- { X86::VPACKSSWBZ256rm , X86::VPACKSSWBYrm },
- { X86::VPACKSSWBZ256rr , X86::VPACKSSWBYrr },
- { X86::VPACKUSDWZ256rm , X86::VPACKUSDWYrm },
- { X86::VPACKUSDWZ256rr , X86::VPACKUSDWYrr },
- { X86::VPACKUSWBZ256rm , X86::VPACKUSWBYrm },
- { X86::VPACKUSWBZ256rr , X86::VPACKUSWBYrr },
- { X86::VPADDBZ256rm , X86::VPADDBYrm },
- { X86::VPADDBZ256rr , X86::VPADDBYrr },
- { X86::VPADDDZ256rm , X86::VPADDDYrm },
- { X86::VPADDDZ256rr , X86::VPADDDYrr },
- { X86::VPADDQZ256rm , X86::VPADDQYrm },
- { X86::VPADDQZ256rr , X86::VPADDQYrr },
- { X86::VPADDSBZ256rm , X86::VPADDSBYrm },
- { X86::VPADDSBZ256rr , X86::VPADDSBYrr },
- { X86::VPADDSWZ256rm , X86::VPADDSWYrm },
- { X86::VPADDSWZ256rr , X86::VPADDSWYrr },
- { X86::VPADDUSBZ256rm , X86::VPADDUSBYrm },
- { X86::VPADDUSBZ256rr , X86::VPADDUSBYrr },
- { X86::VPADDUSWZ256rm , X86::VPADDUSWYrm },
- { X86::VPADDUSWZ256rr , X86::VPADDUSWYrr },
- { X86::VPADDWZ256rm , X86::VPADDWYrm },
- { X86::VPADDWZ256rr , X86::VPADDWYrr },
- { X86::VPALIGNRZ256rmi , X86::VPALIGNRYrmi },
- { X86::VPALIGNRZ256rri , X86::VPALIGNRYrri },
- { X86::VPANDDZ256rm , X86::VPANDYrm },
- { X86::VPANDDZ256rr , X86::VPANDYrr },
- { X86::VPANDQZ256rm , X86::VPANDYrm },
- { X86::VPANDQZ256rr , X86::VPANDYrr },
- { X86::VPAVGBZ256rm , X86::VPAVGBYrm },
- { X86::VPAVGBZ256rr , X86::VPAVGBYrr },
- { X86::VPAVGWZ256rm , X86::VPAVGWYrm },
- { X86::VPAVGWZ256rr , X86::VPAVGWYrr },
- { X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm },
- { X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr },
- { X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm },
- { X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr },
- { X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm },
- { X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr },
- { X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm },
- { X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr },
- { X86::VPERMDZ256rm , X86::VPERMDYrm },
- { X86::VPERMDZ256rr , X86::VPERMDYrr },
- { X86::VPERMILPDZ256mi , X86::VPERMILPDYmi },
- { X86::VPERMILPDZ256ri , X86::VPERMILPDYri },
- { X86::VPERMILPDZ256rm , X86::VPERMILPDYrm },
- { X86::VPERMILPDZ256rr , X86::VPERMILPDYrr },
- { X86::VPERMILPSZ256mi , X86::VPERMILPSYmi },
- { X86::VPERMILPSZ256ri , X86::VPERMILPSYri },
- { X86::VPERMILPSZ256rm , X86::VPERMILPSYrm },
- { X86::VPERMILPSZ256rr , X86::VPERMILPSYrr },
- { X86::VPERMPDZ256mi , X86::VPERMPDYmi },
- { X86::VPERMPDZ256ri , X86::VPERMPDYri },
- { X86::VPERMPSZ256rm , X86::VPERMPSYrm },
- { X86::VPERMPSZ256rr , X86::VPERMPSYrr },
- { X86::VPERMQZ256mi , X86::VPERMQYmi },
- { X86::VPERMQZ256ri , X86::VPERMQYri },
- { X86::VPMADDUBSWZ256rm , X86::VPMADDUBSWYrm },
- { X86::VPMADDUBSWZ256rr , X86::VPMADDUBSWYrr },
- { X86::VPMADDWDZ256rm , X86::VPMADDWDYrm },
- { X86::VPMADDWDZ256rr , X86::VPMADDWDYrr },
- { X86::VPMAXSBZ256rm , X86::VPMAXSBYrm },
- { X86::VPMAXSBZ256rr , X86::VPMAXSBYrr },
- { X86::VPMAXSDZ256rm , X86::VPMAXSDYrm },
- { X86::VPMAXSDZ256rr , X86::VPMAXSDYrr },
- { X86::VPMAXSWZ256rm , X86::VPMAXSWYrm },
- { X86::VPMAXSWZ256rr , X86::VPMAXSWYrr },
- { X86::VPMAXUBZ256rm , X86::VPMAXUBYrm },
- { X86::VPMAXUBZ256rr , X86::VPMAXUBYrr },
- { X86::VPMAXUDZ256rm , X86::VPMAXUDYrm },
- { X86::VPMAXUDZ256rr , X86::VPMAXUDYrr },
- { X86::VPMAXUWZ256rm , X86::VPMAXUWYrm },
- { X86::VPMAXUWZ256rr , X86::VPMAXUWYrr },
- { X86::VPMINSBZ256rm , X86::VPMINSBYrm },
- { X86::VPMINSBZ256rr , X86::VPMINSBYrr },
- { X86::VPMINSDZ256rm , X86::VPMINSDYrm },
- { X86::VPMINSDZ256rr , X86::VPMINSDYrr },
- { X86::VPMINSWZ256rm , X86::VPMINSWYrm },
- { X86::VPMINSWZ256rr , X86::VPMINSWYrr },
- { X86::VPMINUBZ256rm , X86::VPMINUBYrm },
- { X86::VPMINUBZ256rr , X86::VPMINUBYrr },
- { X86::VPMINUDZ256rm , X86::VPMINUDYrm },
- { X86::VPMINUDZ256rr , X86::VPMINUDYrr },
- { X86::VPMINUWZ256rm , X86::VPMINUWYrm },
- { X86::VPMINUWZ256rr , X86::VPMINUWYrr },
- { X86::VPMOVSXBDZ256rm , X86::VPMOVSXBDYrm },
- { X86::VPMOVSXBDZ256rr , X86::VPMOVSXBDYrr },
- { X86::VPMOVSXBQZ256rm , X86::VPMOVSXBQYrm },
- { X86::VPMOVSXBQZ256rr , X86::VPMOVSXBQYrr },
- { X86::VPMOVSXBWZ256rm , X86::VPMOVSXBWYrm },
- { X86::VPMOVSXBWZ256rr , X86::VPMOVSXBWYrr },
- { X86::VPMOVSXDQZ256rm , X86::VPMOVSXDQYrm },
- { X86::VPMOVSXDQZ256rr , X86::VPMOVSXDQYrr },
- { X86::VPMOVSXWDZ256rm , X86::VPMOVSXWDYrm },
- { X86::VPMOVSXWDZ256rr , X86::VPMOVSXWDYrr },
- { X86::VPMOVSXWQZ256rm , X86::VPMOVSXWQYrm },
- { X86::VPMOVSXWQZ256rr , X86::VPMOVSXWQYrr },
- { X86::VPMOVZXBDZ256rm , X86::VPMOVZXBDYrm },
- { X86::VPMOVZXBDZ256rr , X86::VPMOVZXBDYrr },
- { X86::VPMOVZXBQZ256rm , X86::VPMOVZXBQYrm },
- { X86::VPMOVZXBQZ256rr , X86::VPMOVZXBQYrr },
- { X86::VPMOVZXBWZ256rm , X86::VPMOVZXBWYrm },
- { X86::VPMOVZXBWZ256rr , X86::VPMOVZXBWYrr },
- { X86::VPMOVZXDQZ256rm , X86::VPMOVZXDQYrm },
- { X86::VPMOVZXDQZ256rr , X86::VPMOVZXDQYrr },
- { X86::VPMOVZXWDZ256rm , X86::VPMOVZXWDYrm },
- { X86::VPMOVZXWDZ256rr , X86::VPMOVZXWDYrr },
- { X86::VPMOVZXWQZ256rm , X86::VPMOVZXWQYrm },
- { X86::VPMOVZXWQZ256rr , X86::VPMOVZXWQYrr },
- { X86::VPMULDQZ256rm , X86::VPMULDQYrm },
- { X86::VPMULDQZ256rr , X86::VPMULDQYrr },
- { X86::VPMULHRSWZ256rm , X86::VPMULHRSWYrm },
- { X86::VPMULHRSWZ256rr , X86::VPMULHRSWYrr },
- { X86::VPMULHUWZ256rm , X86::VPMULHUWYrm },
- { X86::VPMULHUWZ256rr , X86::VPMULHUWYrr },
- { X86::VPMULHWZ256rm , X86::VPMULHWYrm },
- { X86::VPMULHWZ256rr , X86::VPMULHWYrr },
- { X86::VPMULLDZ256rm , X86::VPMULLDYrm },
- { X86::VPMULLDZ256rr , X86::VPMULLDYrr },
- { X86::VPMULLWZ256rm , X86::VPMULLWYrm },
- { X86::VPMULLWZ256rr , X86::VPMULLWYrr },
- { X86::VPMULUDQZ256rm , X86::VPMULUDQYrm },
- { X86::VPMULUDQZ256rr , X86::VPMULUDQYrr },
- { X86::VPORDZ256rm , X86::VPORYrm },
- { X86::VPORDZ256rr , X86::VPORYrr },
- { X86::VPORQZ256rm , X86::VPORYrm },
- { X86::VPORQZ256rr , X86::VPORYrr },
- { X86::VPSADBWZ256rm , X86::VPSADBWYrm },
- { X86::VPSADBWZ256rr , X86::VPSADBWYrr },
- { X86::VPSHUFBZ256rm , X86::VPSHUFBYrm },
- { X86::VPSHUFBZ256rr , X86::VPSHUFBYrr },
- { X86::VPSHUFDZ256mi , X86::VPSHUFDYmi },
- { X86::VPSHUFDZ256ri , X86::VPSHUFDYri },
- { X86::VPSHUFHWZ256mi , X86::VPSHUFHWYmi },
- { X86::VPSHUFHWZ256ri , X86::VPSHUFHWYri },
- { X86::VPSHUFLWZ256mi , X86::VPSHUFLWYmi },
- { X86::VPSHUFLWZ256ri , X86::VPSHUFLWYri },
- { X86::VPSLLDQZ256rr , X86::VPSLLDQYri },
- { X86::VPSLLDZ256ri , X86::VPSLLDYri },
- { X86::VPSLLDZ256rm , X86::VPSLLDYrm },
- { X86::VPSLLDZ256rr , X86::VPSLLDYrr },
- { X86::VPSLLQZ256ri , X86::VPSLLQYri },
- { X86::VPSLLQZ256rm , X86::VPSLLQYrm },
- { X86::VPSLLQZ256rr , X86::VPSLLQYrr },
- { X86::VPSLLVDZ256rm , X86::VPSLLVDYrm },
- { X86::VPSLLVDZ256rr , X86::VPSLLVDYrr },
- { X86::VPSLLVQZ256rm , X86::VPSLLVQYrm },
- { X86::VPSLLVQZ256rr , X86::VPSLLVQYrr },
- { X86::VPSLLWZ256ri , X86::VPSLLWYri },
- { X86::VPSLLWZ256rm , X86::VPSLLWYrm },
- { X86::VPSLLWZ256rr , X86::VPSLLWYrr },
- { X86::VPSRADZ256ri , X86::VPSRADYri },
- { X86::VPSRADZ256rm , X86::VPSRADYrm },
- { X86::VPSRADZ256rr , X86::VPSRADYrr },
- { X86::VPSRAVDZ256rm , X86::VPSRAVDYrm },
- { X86::VPSRAVDZ256rr , X86::VPSRAVDYrr },
- { X86::VPSRAWZ256ri , X86::VPSRAWYri },
- { X86::VPSRAWZ256rm , X86::VPSRAWYrm },
- { X86::VPSRAWZ256rr , X86::VPSRAWYrr },
- { X86::VPSRLDQZ256rr , X86::VPSRLDQYri },
- { X86::VPSRLDZ256ri , X86::VPSRLDYri },
- { X86::VPSRLDZ256rm , X86::VPSRLDYrm },
- { X86::VPSRLDZ256rr , X86::VPSRLDYrr },
- { X86::VPSRLQZ256ri , X86::VPSRLQYri },
- { X86::VPSRLQZ256rm , X86::VPSRLQYrm },
- { X86::VPSRLQZ256rr , X86::VPSRLQYrr },
- { X86::VPSRLVDZ256rm , X86::VPSRLVDYrm },
- { X86::VPSRLVDZ256rr , X86::VPSRLVDYrr },
- { X86::VPSRLVQZ256rm , X86::VPSRLVQYrm },
- { X86::VPSRLVQZ256rr , X86::VPSRLVQYrr },
- { X86::VPSRLWZ256ri , X86::VPSRLWYri },
- { X86::VPSRLWZ256rm , X86::VPSRLWYrm },
- { X86::VPSRLWZ256rr , X86::VPSRLWYrr },
- { X86::VPSUBBZ256rm , X86::VPSUBBYrm },
- { X86::VPSUBBZ256rr , X86::VPSUBBYrr },
- { X86::VPSUBDZ256rm , X86::VPSUBDYrm },
- { X86::VPSUBDZ256rr , X86::VPSUBDYrr },
- { X86::VPSUBQZ256rm , X86::VPSUBQYrm },
- { X86::VPSUBQZ256rr , X86::VPSUBQYrr },
- { X86::VPSUBSBZ256rm , X86::VPSUBSBYrm },
- { X86::VPSUBSBZ256rr , X86::VPSUBSBYrr },
- { X86::VPSUBSWZ256rm , X86::VPSUBSWYrm },
- { X86::VPSUBSWZ256rr , X86::VPSUBSWYrr },
- { X86::VPSUBUSBZ256rm , X86::VPSUBUSBYrm },
- { X86::VPSUBUSBZ256rr , X86::VPSUBUSBYrr },
- { X86::VPSUBUSWZ256rm , X86::VPSUBUSWYrm },
- { X86::VPSUBUSWZ256rr , X86::VPSUBUSWYrr },
- { X86::VPSUBWZ256rm , X86::VPSUBWYrm },
- { X86::VPSUBWZ256rr , X86::VPSUBWYrr },
- { X86::VPUNPCKHBWZ256rm , X86::VPUNPCKHBWYrm },
- { X86::VPUNPCKHBWZ256rr , X86::VPUNPCKHBWYrr },
- { X86::VPUNPCKHDQZ256rm , X86::VPUNPCKHDQYrm },
- { X86::VPUNPCKHDQZ256rr , X86::VPUNPCKHDQYrr },
- { X86::VPUNPCKHQDQZ256rm , X86::VPUNPCKHQDQYrm },
- { X86::VPUNPCKHQDQZ256rr , X86::VPUNPCKHQDQYrr },
- { X86::VPUNPCKHWDZ256rm , X86::VPUNPCKHWDYrm },
- { X86::VPUNPCKHWDZ256rr , X86::VPUNPCKHWDYrr },
- { X86::VPUNPCKLBWZ256rm , X86::VPUNPCKLBWYrm },
- { X86::VPUNPCKLBWZ256rr , X86::VPUNPCKLBWYrr },
- { X86::VPUNPCKLDQZ256rm , X86::VPUNPCKLDQYrm },
- { X86::VPUNPCKLDQZ256rr , X86::VPUNPCKLDQYrr },
- { X86::VPUNPCKLQDQZ256rm , X86::VPUNPCKLQDQYrm },
- { X86::VPUNPCKLQDQZ256rr , X86::VPUNPCKLQDQYrr },
- { X86::VPUNPCKLWDZ256rm , X86::VPUNPCKLWDYrm },
- { X86::VPUNPCKLWDZ256rr , X86::VPUNPCKLWDYrr },
- { X86::VPXORDZ256rm , X86::VPXORYrm },
- { X86::VPXORDZ256rr , X86::VPXORYrr },
- { X86::VPXORQZ256rm , X86::VPXORYrm },
- { X86::VPXORQZ256rr , X86::VPXORYrr },
- { X86::VSHUFPDZ256rmi , X86::VSHUFPDYrmi },
- { X86::VSHUFPDZ256rri , X86::VSHUFPDYrri },
- { X86::VSHUFPSZ256rmi , X86::VSHUFPSYrmi },
- { X86::VSHUFPSZ256rri , X86::VSHUFPSYrri },
- { X86::VSQRTPDZ256m , X86::VSQRTPDYm },
- { X86::VSQRTPDZ256r , X86::VSQRTPDYr },
- { X86::VSQRTPSZ256m , X86::VSQRTPSYm },
- { X86::VSQRTPSZ256r , X86::VSQRTPSYr },
- { X86::VSUBPDZ256rm , X86::VSUBPDYrm },
- { X86::VSUBPDZ256rr , X86::VSUBPDYrr },
- { X86::VSUBPSZ256rm , X86::VSUBPSYrm },
- { X86::VSUBPSZ256rr , X86::VSUBPSYrr },
- { X86::VUNPCKHPDZ256rm , X86::VUNPCKHPDYrm },
- { X86::VUNPCKHPDZ256rr , X86::VUNPCKHPDYrr },
- { X86::VUNPCKHPSZ256rm , X86::VUNPCKHPSYrm },
- { X86::VUNPCKHPSZ256rr , X86::VUNPCKHPSYrr },
- { X86::VUNPCKLPDZ256rm , X86::VUNPCKLPDYrm },
- { X86::VUNPCKLPDZ256rr , X86::VUNPCKLPDYrr },
- { X86::VUNPCKLPSZ256rm , X86::VUNPCKLPSYrm },
- { X86::VUNPCKLPSZ256rr , X86::VUNPCKLPSYrr },
- { X86::VXORPDZ256rm , X86::VXORPDYrm },
- { X86::VXORPDZ256rr , X86::VXORPDYrr },
- { X86::VXORPSZ256rm , X86::VXORPSYrm },
- { X86::VXORPSZ256rr , X86::VXORPSYrr },
-};
-
-#endif
diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td
index 2ea27a934b47..315a69e6a2a2 100644
--- a/lib/Target/X86/X86InstrVMX.td
+++ b/lib/Target/X86/X86InstrVMX.td
@@ -43,22 +43,26 @@ def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
"vmptrld\t$vmcs", []>, PS;
def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs),
"vmptrst\t$vmcs", []>, TB;
-def VMREAD64rm : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
-def VMREAD32rm : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
-def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
+let mayStore = 1 in {
+def VMREAD64mr : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
+def VMREAD32mr : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
+}
def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
-def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
+let mayLoad = 1 in {
+def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
+def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
+}
// 0F 01 C4
def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
index 2b296e1e5b85..53224431c0e9 100644
--- a/lib/Target/X86/X86InstrXOP.td
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -183,6 +183,27 @@ let ExeDomain = SSEPackedInt in {
defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
}
+// IFMA patterns - for cases where we can safely ignore the overflow bits from
+// the multiply or easily match with existing intrinsics.
+let Predicates = [HasXOP] in {
+ def : Pat<(v8i16 (add (mul (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
+ (v8i16 VR128:$src3))),
+ (VPMACSWWrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v4i32 (add (mul (v4i32 VR128:$src1), (v4i32 VR128:$src2)),
+ (v4i32 VR128:$src3))),
+ (VPMACSDDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v2i64 (add (X86pmuldq (X86PShufd (v4i32 VR128:$src1), (i8 -11)),
+ (X86PShufd (v4i32 VR128:$src2), (i8 -11))),
+ (v2i64 VR128:$src3))),
+ (VPMACSDQHrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v2i64 (add (X86pmuldq (v4i32 VR128:$src1), (v4i32 VR128:$src2)),
+ (v2i64 VR128:$src3))),
+ (VPMACSDQLrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v4i32 (add (X86vpmaddwd (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
+ (v4i32 VR128:$src3))),
+ (VPMADCSWDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+}
+
// Instruction where second source can be memory, third must be imm8
multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128> {
let isCommutable = 1 in
@@ -269,159 +290,87 @@ let ExeDomain = SSEPackedInt in {
}
// Instruction where either second or third source can be memory
-multiclass xop4op_int<bits<8> opc, string OpcodeStr,
- Intrinsic Int128, Intrinsic Int256> {
- // 128-bit Instruction
- def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
+multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, ValueType VT> {
+ def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>,
- XOP_4V;
- def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i128mem:$src3),
+ [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
+ (X86andnp RC:$src3, RC:$src2))))]>, XOP_4V;
+ def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int128 VR128:$src1, VR128:$src2,
- (bitconvert (loadv2i64 addr:$src3))))]>,
+ [(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
+ (X86andnp (load addr:$src3), RC:$src2))))]>,
XOP_4V, VEX_W;
- def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+ def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int128 VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
- VR128:$src3))]>,
+ [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
+ (X86andnp RC:$src3, (load addr:$src2)))))]>,
XOP_4V;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V, VEX_W;
-
- // 256-bit Instruction
- def rrrY : IXOPi8Reg<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>,
- XOP_4V, VEX_L;
- def rrmY : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, i256mem:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR256:$dst,
- (Int256 VR256:$src1, VR256:$src2,
- (bitconvert (loadv4i64 addr:$src3))))]>,
- XOP_4V, VEX_W, VEX_L;
- def rmrY : IXOPi8Reg<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, VR256:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR256:$dst,
- (Int256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2)),
- VR256:$src3))]>,
- XOP_4V, VEX_L;
- // For disassembler
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rrrY_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, VEX_W, VEX_L;
}
let ExeDomain = SSEPackedInt in {
- defm VPCMOV : xop4op_int<0xA2, "vpcmov",
- int_x86_xop_vpcmov, int_x86_xop_vpcmov_256>;
+ defm VPCMOV : xop4op_int<0xA2, "vpcmov", VR128, i128mem, v2i64>;
+ defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64>, VEX_L;
}
-let Predicates = [HasXOP] in {
- def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1),
- (X86andnp VR128:$src3, VR128:$src2))),
- (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
-
- def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1),
- (X86andnp VR256:$src3, VR256:$src2))),
- (VPCMOVrrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
-}
-
-multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType vt128, ValueType vt256,
- ValueType id128, ValueType id256,
- PatFrag ld_128, PatFrag ld_256> {
- def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
+multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand intmemop, X86MemOperand fpmemop,
+ ValueType VT, PatFrag FPLdFrag,
+ PatFrag IntLdFrag> {
+ def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
- (id128 VR128:$src3), (i8 imm:$src4))))]>;
- def rm : IXOP5<opc, MRMSrcMemOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i128mem:$src3, u8imm:$src4),
+ [(set RC:$dst,
+ (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>;
+ def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
- (id128 (bitconvert (loadv2i64 addr:$src3))),
- (i8 imm:$src4))))]>,
- VEX_W;
- def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4),
+ [(set RC:$dst,
+ (VT (X86vpermil2 RC:$src1, RC:$src2,
+ (bitconvert (IntLdFrag addr:$src3)),
+ (i8 imm:$src4))))]>, VEX_W;
+ def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1),
- (vt128 (bitconvert (ld_128 addr:$src2))),
- (id128 VR128:$src3), (i8 imm:$src4))))]>;
+ [(set RC:$dst,
+ (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
+ RC:$src3, (i8 imm:$src4))))]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rr_REV : IXOP5<opc, MRMSrcRegOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
+ def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[]>, VEX_W;
-
- def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR256:$dst,
- (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
- (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
- def rmY : IXOP5<opc, MRMSrcMemOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, i256mem:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR256:$dst,
- (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
- (id256 (bitconvert (loadv4i64 addr:$src3))),
- (i8 imm:$src4))))]>, VEX_W, VEX_L;
- def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR256:$dst,
- (vt256 (OpNode (vt256 VR256:$src1),
- (vt256 (bitconvert (ld_256 addr:$src2))),
- (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
- // For disassembler
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rrY_REV : IXOP5<opc, MRMSrcRegOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>, VEX_W, VEX_L;
}
-let ExeDomain = SSEPackedDouble in
- defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", X86vpermil2, v2f64, v4f64,
- v2i64, v4i64, loadv2f64, loadv4f64>;
+let ExeDomain = SSEPackedDouble in {
+ defm VPERMIL2PD : xop_vpermil2<0x49, "vpermil2pd", VR128, i128mem, f128mem,
+ v2f64, loadv2f64, loadv2i64>;
+ defm VPERMIL2PDY : xop_vpermil2<0x49, "vpermil2pd", VR256, i256mem, f256mem,
+ v4f64, loadv4f64, loadv4i64>, VEX_L;
+}
-let ExeDomain = SSEPackedSingle in
- defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", X86vpermil2, v4f32, v8f32,
- v4i32, v8i32, loadv4f32, loadv8f32>;
+let ExeDomain = SSEPackedSingle in {
+ defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem,
+ v4f32, loadv4f32, loadv2i64>;
+ defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem,
+ v8f32, loadv8f32, loadv4i64>, VEX_L;
+}
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
new file mode 100644
index 000000000000..6cc5e8b63597
--- /dev/null
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -0,0 +1,516 @@
+//===- X86InstructionSelector.cpp ----------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the InstructionSelector class for
+/// X86.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86RegisterBankInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "X86-isel"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+namespace {
+
+class X86InstructionSelector : public InstructionSelector {
+public:
+ X86InstructionSelector(const X86Subtarget &STI,
+ const X86RegisterBankInfo &RBI);
+
+ bool select(MachineInstr &I) const override;
+
+private:
+ /// tblgen-erated 'select' implementation, used as the initial selector for
+ /// the patterns that don't require complex C++.
+ bool selectImpl(MachineInstr &I) const;
+
+ // TODO: remove after selectImpl support pattern with a predicate.
+ unsigned getFAddOp(LLT &Ty, const RegisterBank &RB) const;
+ unsigned getFSubOp(LLT &Ty, const RegisterBank &RB) const;
+ unsigned getAddOp(LLT &Ty, const RegisterBank &RB) const;
+ unsigned getSubOp(LLT &Ty, const RegisterBank &RB) const;
+ unsigned getLoadStoreOp(LLT &Ty, const RegisterBank &RB, unsigned Opc,
+ uint64_t Alignment) const;
+
+ bool selectBinaryOp(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+ bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+ bool selectFrameIndex(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+ bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+
+ const X86Subtarget &STI;
+ const X86InstrInfo &TII;
+ const X86RegisterInfo &TRI;
+ const X86RegisterBankInfo &RBI;
+
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "X86GenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+};
+
+} // end anonymous namespace
+
+#define GET_GLOBALISEL_IMPL
+#include "X86GenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
+
+X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI,
+ const X86RegisterBankInfo &RBI)
+ : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()),
+ TRI(*STI.getRegisterInfo()), RBI(RBI)
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "X86GenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+{
+}
+
+// FIXME: This should be target-independent, inferred from the types declared
+// for each class in the bank.
+static const TargetRegisterClass *
+getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) {
+ if (RB.getID() == X86::GPRRegBankID) {
+ if (Ty.getSizeInBits() == 32)
+ return &X86::GR32RegClass;
+ if (Ty.getSizeInBits() == 64)
+ return &X86::GR64RegClass;
+ }
+ if (RB.getID() == X86::VECRRegBankID) {
+ if (Ty.getSizeInBits() == 32)
+ return &X86::FR32XRegClass;
+ if (Ty.getSizeInBits() == 64)
+ return &X86::FR64XRegClass;
+ if (Ty.getSizeInBits() == 128)
+ return &X86::VR128XRegClass;
+ if (Ty.getSizeInBits() == 256)
+ return &X86::VR256XRegClass;
+ if (Ty.getSizeInBits() == 512)
+ return &X86::VR512RegClass;
+ }
+
+ llvm_unreachable("Unknown RegBank!");
+}
+
+// Set X86 Opcode and constrain DestReg.
+static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+
+ unsigned DstReg = I.getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ assert(I.isCopy() && "Generic operators do not allow physical registers");
+ return true;
+ }
+
+ const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+ const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+ (void)DstSize;
+ unsigned SrcReg = I.getOperand(1).getReg();
+ const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+ (void)SrcSize;
+ assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
+ "No phys reg on generic operators");
+ assert((DstSize == SrcSize ||
+ // Copies are a mean to setup initial types, the number of
+ // bits may not exactly match.
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) &&
+ "Copy with different width?!");
+
+ const TargetRegisterClass *RC = nullptr;
+
+ switch (RegBank.getID()) {
+ case X86::GPRRegBankID:
+ assert((DstSize <= 64) && "GPRs cannot get more than 64-bit width values.");
+ RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank);
+ break;
+ case X86::VECRRegBankID:
+ RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank);
+ break;
+ default:
+ llvm_unreachable("Unknown RegBank!");
+ }
+
+ // No need to constrain SrcReg. It will get constrained when
+ // we hit another of its use or its defs.
+ // Copies do not have constraints.
+ const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg);
+ if (!OldRC || !RC->hasSubClassEq(OldRC)) {
+ if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
+ DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
+ << " operand\n");
+ return false;
+ }
+ }
+ I.setDesc(TII.get(X86::COPY));
+ return true;
+}
+
+bool X86InstructionSelector::select(MachineInstr &I) const {
+ assert(I.getParent() && "Instruction should be in a basic block!");
+ assert(I.getParent()->getParent() && "Instruction should be in a function!");
+
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned Opcode = I.getOpcode();
+ if (!isPreISelGenericOpcode(Opcode)) {
+ // Certain non-generic instructions also need some special handling.
+
+ if (I.isCopy())
+ return selectCopy(I, TII, MRI, TRI, RBI);
+
+ // TODO: handle more cases - LOAD_STACK_GUARD, PHI
+ return true;
+ }
+
+ assert(I.getNumOperands() == I.getNumExplicitOperands() &&
+ "Generic instruction has unexpected implicit operands\n");
+
+ // TODO: This should be implemented by tblgen, pattern with predicate not
+ // supported yet.
+ if (selectBinaryOp(I, MRI, MF))
+ return true;
+ if (selectLoadStoreOp(I, MRI, MF))
+ return true;
+ if (selectFrameIndex(I, MRI, MF))
+ return true;
+ if (selectConstant(I, MRI, MF))
+ return true;
+
+ return selectImpl(I);
+}
+
+unsigned X86InstructionSelector::getFAddOp(LLT &Ty,
+ const RegisterBank &RB) const {
+
+ if (X86::VECRRegBankID != RB.getID())
+ return TargetOpcode::G_FADD;
+
+ if (Ty == LLT::scalar(32)) {
+ if (STI.hasAVX512()) {
+ return X86::VADDSSZrr;
+ } else if (STI.hasAVX()) {
+ return X86::VADDSSrr;
+ } else if (STI.hasSSE1()) {
+ return X86::ADDSSrr;
+ }
+ } else if (Ty == LLT::scalar(64)) {
+ if (STI.hasAVX512()) {
+ return X86::VADDSDZrr;
+ } else if (STI.hasAVX()) {
+ return X86::VADDSDrr;
+ } else if (STI.hasSSE2()) {
+ return X86::ADDSDrr;
+ }
+ } else if (Ty == LLT::vector(4, 32)) {
+ if ((STI.hasAVX512()) && (STI.hasVLX())) {
+ return X86::VADDPSZ128rr;
+ } else if (STI.hasAVX()) {
+ return X86::VADDPSrr;
+ } else if (STI.hasSSE1()) {
+ return X86::ADDPSrr;
+ }
+ }
+
+ return TargetOpcode::G_FADD;
+}
+
+unsigned X86InstructionSelector::getFSubOp(LLT &Ty,
+ const RegisterBank &RB) const {
+
+ if (X86::VECRRegBankID != RB.getID())
+ return TargetOpcode::G_FSUB;
+
+ if (Ty == LLT::scalar(32)) {
+ if (STI.hasAVX512()) {
+ return X86::VSUBSSZrr;
+ } else if (STI.hasAVX()) {
+ return X86::VSUBSSrr;
+ } else if (STI.hasSSE1()) {
+ return X86::SUBSSrr;
+ }
+ } else if (Ty == LLT::scalar(64)) {
+ if (STI.hasAVX512()) {
+ return X86::VSUBSDZrr;
+ } else if (STI.hasAVX()) {
+ return X86::VSUBSDrr;
+ } else if (STI.hasSSE2()) {
+ return X86::SUBSDrr;
+ }
+ } else if (Ty == LLT::vector(4, 32)) {
+ if ((STI.hasAVX512()) && (STI.hasVLX())) {
+ return X86::VSUBPSZ128rr;
+ } else if (STI.hasAVX()) {
+ return X86::VSUBPSrr;
+ } else if (STI.hasSSE1()) {
+ return X86::SUBPSrr;
+ }
+ }
+
+ return TargetOpcode::G_FSUB;
+}
+
+unsigned X86InstructionSelector::getAddOp(LLT &Ty,
+ const RegisterBank &RB) const {
+
+ if (X86::VECRRegBankID != RB.getID())
+ return TargetOpcode::G_ADD;
+
+ if (Ty == LLT::vector(4, 32)) {
+ if (STI.hasAVX512() && STI.hasVLX()) {
+ return X86::VPADDDZ128rr;
+ } else if (STI.hasAVX()) {
+ return X86::VPADDDrr;
+ } else if (STI.hasSSE2()) {
+ return X86::PADDDrr;
+ }
+ }
+
+ return TargetOpcode::G_ADD;
+}
+
+unsigned X86InstructionSelector::getSubOp(LLT &Ty,
+ const RegisterBank &RB) const {
+
+ if (X86::VECRRegBankID != RB.getID())
+ return TargetOpcode::G_SUB;
+
+ if (Ty == LLT::vector(4, 32)) {
+ if (STI.hasAVX512() && STI.hasVLX()) {
+ return X86::VPSUBDZ128rr;
+ } else if (STI.hasAVX()) {
+ return X86::VPSUBDrr;
+ } else if (STI.hasSSE2()) {
+ return X86::PSUBDrr;
+ }
+ }
+
+ return TargetOpcode::G_SUB;
+}
+
+bool X86InstructionSelector::selectBinaryOp(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+
+ const unsigned DefReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DefReg);
+ const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+
+ unsigned NewOpc = I.getOpcode();
+
+ switch (NewOpc) {
+ case TargetOpcode::G_FADD:
+ NewOpc = getFAddOp(Ty, RB);
+ break;
+ case TargetOpcode::G_FSUB:
+ NewOpc = getFSubOp(Ty, RB);
+ break;
+ case TargetOpcode::G_ADD:
+ NewOpc = getAddOp(Ty, RB);
+ break;
+ case TargetOpcode::G_SUB:
+ NewOpc = getSubOp(Ty, RB);
+ break;
+ default:
+ break;
+ }
+
+ if (NewOpc == I.getOpcode())
+ return false;
+
+ I.setDesc(TII.get(NewOpc));
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB,
+ unsigned Opc,
+ uint64_t Alignment) const {
+ bool Isload = (Opc == TargetOpcode::G_LOAD);
+ bool HasAVX = STI.hasAVX();
+ bool HasAVX512 = STI.hasAVX512();
+ bool HasVLX = STI.hasVLX();
+
+ if (Ty == LLT::scalar(8)) {
+ if (X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV8rm : X86::MOV8mr;
+ } else if (Ty == LLT::scalar(16)) {
+ if (X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV16rm : X86::MOV16mr;
+ } else if (Ty == LLT::scalar(32)) {
+ if (X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV32rm : X86::MOV32mr;
+ if (X86::VECRRegBankID == RB.getID())
+ return Isload ? (HasAVX512 ? X86::VMOVSSZrm
+ : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm)
+ : (HasAVX512 ? X86::VMOVSSZmr
+ : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
+ } else if (Ty == LLT::scalar(64)) {
+ if (X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV64rm : X86::MOV64mr;
+ if (X86::VECRRegBankID == RB.getID())
+ return Isload ? (HasAVX512 ? X86::VMOVSDZrm
+ : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm)
+ : (HasAVX512 ? X86::VMOVSDZmr
+ : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
+ } else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
+ if (Alignment >= 16)
+ return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
+ : HasAVX512
+ ? X86::VMOVAPSZ128rm_NOVLX
+ : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm)
+ : (HasVLX ? X86::VMOVAPSZ128mr
+ : HasAVX512
+ ? X86::VMOVAPSZ128mr_NOVLX
+ : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
+ else
+ return Isload ? (HasVLX ? X86::VMOVUPSZ128rm
+ : HasAVX512
+ ? X86::VMOVUPSZ128rm_NOVLX
+ : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm)
+ : (HasVLX ? X86::VMOVUPSZ128mr
+ : HasAVX512
+ ? X86::VMOVUPSZ128mr_NOVLX
+ : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
+ }
+ return Opc;
+}
+
+bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+
+ unsigned Opc = I.getOpcode();
+
+ if (Opc != TargetOpcode::G_STORE && Opc != TargetOpcode::G_LOAD)
+ return false;
+
+ const unsigned DefReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DefReg);
+ const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+
+ auto &MemOp = **I.memoperands_begin();
+ unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment());
+ if (NewOpc == Opc)
+ return false;
+
+ I.setDesc(TII.get(NewOpc));
+ MachineInstrBuilder MIB(MF, I);
+ if (Opc == TargetOpcode::G_LOAD)
+ addOffset(MIB, 0);
+ else {
+ // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL)
+ I.RemoveOperand(0);
+ addOffset(MIB, 0).addUse(DefReg);
+ }
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+bool X86InstructionSelector::selectFrameIndex(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ if (I.getOpcode() != TargetOpcode::G_FRAME_INDEX)
+ return false;
+
+ const unsigned DefReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DefReg);
+
+ // Use LEA to calculate frame index.
+ unsigned NewOpc;
+ if (Ty == LLT::pointer(0, 64))
+ NewOpc = X86::LEA64r;
+ else if (Ty == LLT::pointer(0, 32))
+ NewOpc = STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r;
+ else
+ llvm_unreachable("Can't select G_FRAME_INDEX, unsupported type.");
+
+ I.setDesc(TII.get(NewOpc));
+ MachineInstrBuilder MIB(MF, I);
+ addOffset(MIB, 0);
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+bool X86InstructionSelector::selectConstant(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ if (I.getOpcode() != TargetOpcode::G_CONSTANT)
+ return false;
+
+ const unsigned DefReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DefReg);
+
+ assert(Ty.isScalar() && "invalid element type.");
+
+ uint64_t Val = 0;
+ if (I.getOperand(1).isCImm()) {
+ Val = I.getOperand(1).getCImm()->getZExtValue();
+ I.getOperand(1).ChangeToImmediate(Val);
+ } else if (I.getOperand(1).isImm()) {
+ Val = I.getOperand(1).getImm();
+ } else
+ llvm_unreachable("Unsupported operand type.");
+
+ unsigned NewOpc;
+ switch (Ty.getSizeInBits()) {
+ case 8:
+ NewOpc = X86::MOV8ri;
+ break;
+ case 16:
+ NewOpc = X86::MOV16ri;
+ break;
+ case 32:
+ NewOpc = X86::MOV32ri;
+ break;
+ case 64: {
+ // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used
+ if (isInt<32>(Val))
+ NewOpc = X86::MOV64ri32;
+ else
+ NewOpc = X86::MOV64ri;
+ break;
+ }
+ default:
+ llvm_unreachable("Can't select G_CONSTANT, unsupported type.");
+ }
+
+ I.setDesc(TII.get(NewOpc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+InstructionSelector *
+llvm::createX86InstructionSelector(X86Subtarget &Subtarget,
+ X86RegisterBankInfo &RBI) {
+ return new X86InstructionSelector(Subtarget, RBI);
+}
diff --git a/lib/Target/X86/X86InterleavedAccess.cpp b/lib/Target/X86/X86InterleavedAccess.cpp
index d9edf4676faf..806d6cc888f0 100644
--- a/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/lib/Target/X86/X86InterleavedAccess.cpp
@@ -19,6 +19,7 @@
using namespace llvm;
+namespace {
/// \brief This class holds necessary information to represent an interleaved
/// access group and supports utilities to lower the group into
/// X86-specific instructions/intrinsics.
@@ -27,7 +28,6 @@ using namespace llvm;
/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
/// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6>
/// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7>
-
class X86InterleavedAccessGroup {
/// \brief Reference to the wide-load instruction of an interleaved access
/// group.
@@ -95,6 +95,7 @@ public:
/// instructions/intrinsics.
bool lowerIntoOptimizedSequence();
};
+} // end anonymous namespace
bool X86InterleavedAccessGroup::isSupported() const {
VectorType *ShuffleVecTy = Shuffles[0]->getType();
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 63a02af02faa..2a40399ba571 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -36,7 +36,7 @@ enum IntrinsicType : uint16_t {
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
EXPAND_FROM_MEM,
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
- FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
+ FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK, GATHER_AVX2, MASK_BINOP,
};
struct IntrinsicData {
@@ -67,6 +67,23 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_d, GATHER_AVX2, X86::VPGATHERDDrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_d_256, GATHER_AVX2, X86::VPGATHERDDYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_pd, GATHER_AVX2, X86::VGATHERDPDrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_pd_256, GATHER_AVX2, X86::VGATHERDPDYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_ps, GATHER_AVX2, X86::VGATHERDPSrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_ps_256, GATHER_AVX2, X86::VGATHERDPSYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_q, GATHER_AVX2, X86::VPGATHERDQrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_q_256, GATHER_AVX2, X86::VPGATHERDQYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_d, GATHER_AVX2, X86::VPGATHERQDrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_d_256, GATHER_AVX2, X86::VPGATHERQDYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_pd, GATHER_AVX2, X86::VGATHERQPDrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_pd_256, GATHER_AVX2, X86::VGATHERQPDYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_ps, GATHER_AVX2, X86::VGATHERQPSrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_ps_256, GATHER_AVX2, X86::VGATHERQPSYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_q, GATHER_AVX2, X86::VPGATHERQQrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_q_256, GATHER_AVX2, X86::VPGATHERQQYrm, 0),
+
X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
@@ -325,6 +342,8 @@ static const IntrinsicData* getIntrinsicWithChain(uint16_t IntNo) {
* the alphabetical order.
*/
static const IntrinsicData IntrinsicsWithoutChain[] = {
+ X86_INTRINSIC_DATA(avx_cmp_pd_256, INTR_TYPE_3OP, X86ISD::CMPP, 0),
+ X86_INTRINSIC_DATA(avx_cmp_ps_256, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0),
X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
@@ -351,9 +370,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
- X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, ISD::ABS, 0),
X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
@@ -421,18 +440,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_cvtd2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_cvtd2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_cvtd2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2b_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2b_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2b_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2d_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2d_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2d_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2q_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2q_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2q_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2w_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2w_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2w_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx512_cvtq2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_cvtq2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_cvtq2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
@@ -455,18 +462,20 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
+ X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0),
+ X86_INTRINSIC_DATA(avx512_kor_w, MASK_BINOP, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK, ISD::CONCAT_VECTORS, 0),
X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0),
X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0),
-
+ X86_INTRINSIC_DATA(avx512_kxor_w, MASK_BINOP, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD,
X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FADD_RND, 0),
+ X86ISD::FADDS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FADD_RND, 0),
+ X86ISD::FADDS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, BRCST32x2_TO_VEC,
X86ISD::VBROADCAST, 0),
X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, BRCST32x2_TO_VEC,
@@ -720,9 +729,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FDIV_RND, 0),
+ X86ISD::FDIVS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FDIV_RND, 0),
+ X86ISD::FDIVS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG,
@@ -795,74 +804,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VGETMANTS, 0),
X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
X86ISD::VGETMANTS, 0),
- X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
- ISD::CTLZ, 0),
- X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
- ISD::CTLZ, 0),
- X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_512, INTR_TYPE_1OP_MASK,
- ISD::CTLZ, 0),
- X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_128, INTR_TYPE_1OP_MASK,
- ISD::CTLZ, 0),
- X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_256, INTR_TYPE_1OP_MASK,
- ISD::CTLZ, 0),
- X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_512, INTR_TYPE_1OP_MASK,
- ISD::CTLZ, 0),
- X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
X86ISD::FMAX_RND),
- X86_INTRINSIC_DATA(avx512_mask_max_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
X86ISD::FMAX_RND),
- X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMAX_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMAX_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_min_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_min_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FMAXS, X86ISD::FMAXS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FMAXS, X86ISD::FMAXS_RND),
X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
X86ISD::FMIN_RND),
- X86_INTRINSIC_DATA(avx512_mask_min_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
X86ISD::FMIN_RND),
- X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMIN_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMIN_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FMINS, X86ISD::FMINS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FMINS, X86ISD::FMINS_RND),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_mask_mul_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMUL_RND, 0),
+ X86ISD::FMULS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMUL_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packsswb_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packsswb_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packsswb_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packusdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packusdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packusdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packuswb_128, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packuswb_256, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packuswb_512, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
+ X86ISD::FMULS_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
@@ -1191,9 +1168,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
X86ISD::FSUB_RND),
X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FSUB_RND, 0),
+ X86ISD::FSUBS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FSUB_RND, 0),
+ X86ISD::FSUBS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
@@ -1486,6 +1463,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_512, FMA_OP_MASKZ,
X86ISD::VPMADD52L, 0),
+ X86_INTRINSIC_DATA(avx512_packssdw_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
+ X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
+ X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_pmul_dq_512, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_pmulu_dq_512, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
@@ -1613,6 +1594,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(fma_vfnmsub_pd_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_ps, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse_comigt_ss, COMI, X86ISD::COMI, ISD::SETGT),
@@ -1620,7 +1602,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse_comilt_ss, COMI, X86ISD::COMI, ISD::SETLT),
X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE),
X86_INTRINSIC_DATA(sse_max_ps, INTR_TYPE_2OP, X86ISD::FMAX, 0),
+ X86_INTRINSIC_DATA(sse_max_ss, INTR_TYPE_2OP, X86ISD::FMAXS, 0),
X86_INTRINSIC_DATA(sse_min_ps, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(sse_min_ss, INTR_TYPE_2OP, X86ISD::FMINS, 0),
X86_INTRINSIC_DATA(sse_movmsk_ps, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse_rcp_ps, INTR_TYPE_1OP, X86ISD::FRCP, 0),
X86_INTRINSIC_DATA(sse_rsqrt_ps, INTR_TYPE_1OP, X86ISD::FRSQRT, 0),
@@ -1631,6 +1615,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse_ucomile_ss, COMI, X86ISD::UCOMI, ISD::SETLE),
X86_INTRINSIC_DATA(sse_ucomilt_ss, COMI, X86ISD::UCOMI, ISD::SETLT),
X86_INTRINSIC_DATA(sse_ucomineq_ss, COMI, X86ISD::UCOMI, ISD::SETNE),
+ X86_INTRINSIC_DATA(sse2_cmp_pd, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse2_comige_sd, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse2_comigt_sd, COMI, X86ISD::COMI, ISD::SETGT),
@@ -1643,7 +1628,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
+ X86_INTRINSIC_DATA(sse2_max_sd, INTR_TYPE_2OP, X86ISD::FMAXS, 0),
X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(sse2_min_sd, INTR_TYPE_2OP, X86ISD::FMINS, 0),
X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
@@ -1696,9 +1683,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
- X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, ISD::ABS, 0),
X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
new file mode 100644
index 000000000000..c2dc762fec5e
--- /dev/null
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -0,0 +1,142 @@
+//===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for X86.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "X86LegalizerInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Target/TargetOpcodes.h"
+
+using namespace llvm;
+using namespace TargetOpcode;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
+ const X86TargetMachine &TM)
+ : Subtarget(STI), TM(TM) {
+
+ setLegalizerInfo32bit();
+ setLegalizerInfo64bit();
+ setLegalizerInfoSSE1();
+ setLegalizerInfoSSE2();
+
+ computeTables();
+}
+
+void X86LegalizerInfo::setLegalizerInfo32bit() {
+
+ if (Subtarget.is64Bit())
+ return;
+
+ const LLT p0 = LLT::pointer(0, 32);
+ const LLT s1 = LLT::scalar(1);
+ const LLT s8 = LLT::scalar(8);
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
+
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {s8, s16, s32})
+ setAction({BinOp, Ty}, Legal);
+
+ for (unsigned MemOp : {G_LOAD, G_STORE}) {
+ for (auto Ty : {s8, s16, s32, p0})
+ setAction({MemOp, Ty}, Legal);
+
+ // And everything's fine in addrspace 0.
+ setAction({MemOp, 1, p0}, Legal);
+ }
+
+ // Pointer-handling
+ setAction({G_FRAME_INDEX, p0}, Legal);
+
+ // Constants
+ for (auto Ty : {s8, s16, s32, p0})
+ setAction({TargetOpcode::G_CONSTANT, Ty}, Legal);
+
+ setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar);
+ setAction({TargetOpcode::G_CONSTANT, s64}, NarrowScalar);
+}
+
+void X86LegalizerInfo::setLegalizerInfo64bit() {
+
+ if (!Subtarget.is64Bit())
+ return;
+
+ const LLT p0 = LLT::pointer(0, TM.getPointerSize() * 8);
+ const LLT s1 = LLT::scalar(1);
+ const LLT s8 = LLT::scalar(8);
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
+
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {s8, s16, s32, s64})
+ setAction({BinOp, Ty}, Legal);
+
+ for (unsigned MemOp : {G_LOAD, G_STORE}) {
+ for (auto Ty : {s8, s16, s32, s64, p0})
+ setAction({MemOp, Ty}, Legal);
+
+ // And everything's fine in addrspace 0.
+ setAction({MemOp, 1, p0}, Legal);
+ }
+
+ // Pointer-handling
+ setAction({G_FRAME_INDEX, p0}, Legal);
+
+ // Constants
+ for (auto Ty : {s8, s16, s32, s64, p0})
+ setAction({TargetOpcode::G_CONSTANT, Ty}, Legal);
+
+ setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar);
+}
+
+void X86LegalizerInfo::setLegalizerInfoSSE1() {
+ if (!Subtarget.hasSSE1())
+ return;
+
+ const LLT s32 = LLT::scalar(32);
+ const LLT v4s32 = LLT::vector(4, 32);
+ const LLT v2s64 = LLT::vector(2, 64);
+
+ for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
+ for (auto Ty : {s32, v4s32})
+ setAction({BinOp, Ty}, Legal);
+
+ for (unsigned MemOp : {G_LOAD, G_STORE})
+ for (auto Ty : {v4s32, v2s64})
+ setAction({MemOp, Ty}, Legal);
+}
+
+void X86LegalizerInfo::setLegalizerInfoSSE2() {
+ if (!Subtarget.hasSSE2())
+ return;
+
+ const LLT s64 = LLT::scalar(64);
+ const LLT v4s32 = LLT::vector(4, 32);
+ const LLT v2s64 = LLT::vector(2, 64);
+
+ for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
+ for (auto Ty : {s64, v2s64})
+ setAction({BinOp, Ty}, Legal);
+
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {v4s32})
+ setAction({BinOp, Ty}, Legal);
+}
diff --git a/lib/Target/X86/X86LegalizerInfo.h b/lib/Target/X86/X86LegalizerInfo.h
new file mode 100644
index 000000000000..3f00898b4232
--- /dev/null
+++ b/lib/Target/X86/X86LegalizerInfo.h
@@ -0,0 +1,43 @@
+//===- X86LegalizerInfo.h ------------------------------------------*- C++
+//-*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the Machinelegalizer class for X86.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_X86MACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_X86_X86MACHINELEGALIZER_H
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+namespace llvm {
+
+class X86Subtarget;
+class X86TargetMachine;
+
+/// This class provides the information for the target register banks.
+class X86LegalizerInfo : public LegalizerInfo {
+private:
+ /// Keep a reference to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget &Subtarget;
+ const X86TargetMachine &TM;
+
+public:
+ X86LegalizerInfo(const X86Subtarget &STI, const X86TargetMachine &TM);
+
+private:
+ void setLegalizerInfo32bit();
+ void setLegalizerInfo64bit();
+ void setLegalizerInfoSSE1();
+ void setLegalizerInfoSSE2();
+};
+} // namespace llvm
+#endif
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index feeb2fd5993c..550e3543a71e 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -102,7 +102,7 @@ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
}
void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
- OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
+ OutStreamer->EmitInstruction(Inst, getSubtargetInfo(), EnablePrintSchedInfo);
SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
}
@@ -215,6 +215,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_GOT: RefKind = MCSymbolRefExpr::VK_GOT; break;
case X86II::MO_GOTOFF: RefKind = MCSymbolRefExpr::VK_GOTOFF; break;
case X86II::MO_PLT: RefKind = MCSymbolRefExpr::VK_PLT; break;
+ case X86II::MO_ABS8: RefKind = MCSymbolRefExpr::VK_X86_ABS8; break;
case X86II::MO_PIC_BASE_OFFSET:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
Expr = MCSymbolRefExpr::create(Sym, Ctx);
@@ -357,7 +358,7 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
const MachineOperand &MO) const {
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
@@ -498,11 +499,16 @@ ReSimplify:
break;
}
- // TAILJMPd, TAILJMPd64 - Lower to the correct jump instruction.
+ // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction.
{ unsigned Opcode;
case X86::TAILJMPr: Opcode = X86::JMP32r; goto SetTailJmpOpcode;
case X86::TAILJMPd:
case X86::TAILJMPd64: Opcode = X86::JMP_1; goto SetTailJmpOpcode;
+ case X86::TAILJMPd_CC:
+ case X86::TAILJMPd64_CC:
+ Opcode = X86::GetCondBranchFromCond(
+ static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
+ goto SetTailJmpOpcode;
SetTailJmpOpcode:
MCOperand Saved = OutMI.getOperand(0);
@@ -888,30 +894,47 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
SM.recordStatepoint(MI);
}
-void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI,
- X86MCInstLower &MCIL) {
- // FAULTING_LOAD_OP <def>, <MBB handler>, <load opcode>, <load operands>
+void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
+ X86MCInstLower &MCIL) {
+ // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
+ // <opcode>, <operands>
- unsigned LoadDefRegister = MI.getOperand(0).getReg();
- MCSymbol *HandlerLabel = MI.getOperand(1).getMBB()->getSymbol();
- unsigned LoadOpcode = MI.getOperand(2).getImm();
- unsigned LoadOperandsBeginIdx = 3;
+ unsigned DefRegister = FaultingMI.getOperand(0).getReg();
+ FaultMaps::FaultKind FK =
+ static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
+ MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
+ unsigned Opcode = FaultingMI.getOperand(3).getImm();
+ unsigned OperandsBeginIdx = 4;
- FM.recordFaultingOp(FaultMaps::FaultingLoad, HandlerLabel);
+ assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
+ FM.recordFaultingOp(FK, HandlerLabel);
- MCInst LoadMI;
- LoadMI.setOpcode(LoadOpcode);
+ MCInst MI;
+ MI.setOpcode(Opcode);
- if (LoadDefRegister != X86::NoRegister)
- LoadMI.addOperand(MCOperand::createReg(LoadDefRegister));
+ if (DefRegister != X86::NoRegister)
+ MI.addOperand(MCOperand::createReg(DefRegister));
- for (auto I = MI.operands_begin() + LoadOperandsBeginIdx,
- E = MI.operands_end();
+ for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
+ E = FaultingMI.operands_end();
I != E; ++I)
- if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, *I))
- LoadMI.addOperand(MaybeOperand.getValue());
+ if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
+ MI.addOperand(MaybeOperand.getValue());
+
+ OutStreamer->EmitInstruction(MI, getSubtargetInfo());
+}
- OutStreamer->EmitInstruction(LoadMI, getSubtargetInfo());
+void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
+ X86MCInstLower &MCIL) {
+ bool Is64Bits = Subtarget->is64Bit();
+ MCContext &Ctx = OutStreamer->getContext();
+ MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
+ const MCSymbolRefExpr *Op =
+ MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
+
+ EmitAndCountInstruction(
+ MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
+ .addExpr(Op));
}
void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
@@ -1276,9 +1299,11 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::TAILJMPr:
case X86::TAILJMPm:
case X86::TAILJMPd:
+ case X86::TAILJMPd_CC:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
case X86::TAILJMPd64:
+ case X86::TAILJMPd64_CC:
case X86::TAILJMPr64_REX:
case X86::TAILJMPm64_REX:
// Lower these as normal, but add some comments.
@@ -1367,8 +1392,11 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case TargetOpcode::STATEPOINT:
return LowerSTATEPOINT(*MI, MCInstLowering);
- case TargetOpcode::FAULTING_LOAD_OP:
- return LowerFAULTING_LOAD_OP(*MI, MCInstLowering);
+ case TargetOpcode::FAULTING_OP:
+ return LowerFAULTING_OP(*MI, MCInstLowering);
+
+ case TargetOpcode::FENTRY_CALL:
+ return LowerFENTRY_CALL(*MI, MCInstLowering);
case TargetOpcode::PATCHABLE_OP:
return LowerPATCHABLE_OP(*MI, MCInstLowering);
@@ -1501,7 +1529,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 64> Mask;
DecodePSHUFBMask(C, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
+ !EnablePrintSchedInfo);
}
break;
}
@@ -1572,15 +1601,16 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPERMILPMask(C, ElSize, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
+ !EnablePrintSchedInfo);
}
break;
}
case X86::VPERMIL2PDrm:
case X86::VPERMIL2PSrm:
- case X86::VPERMIL2PDrmY:
- case X86::VPERMIL2PSrmY: {
+ case X86::VPERMIL2PDYrm:
+ case X86::VPERMIL2PSYrm: {
if (!OutStreamer->isVerboseAsm())
break;
assert(MI->getNumOperands() >= 8 &&
@@ -1593,8 +1623,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned ElSize;
switch (MI->getOpcode()) {
default: llvm_unreachable("Invalid opcode");
- case X86::VPERMIL2PSrm: case X86::VPERMIL2PSrmY: ElSize = 32; break;
- case X86::VPERMIL2PDrm: case X86::VPERMIL2PDrmY: ElSize = 64; break;
+ case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
+ case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
}
const MachineOperand &MaskOp = MI->getOperand(6);
@@ -1602,7 +1632,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
+ !EnablePrintSchedInfo);
}
break;
}
@@ -1618,7 +1649,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPPERMMask(C, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
+ !EnablePrintSchedInfo);
}
break;
}
@@ -1678,7 +1710,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
CS << "?";
}
CS << "]";
- OutStreamer->AddComment(CS.str());
+ OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
CS << "<";
for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
@@ -1710,7 +1742,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
CS << ">";
- OutStreamer->AddComment(CS.str());
+ OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
}
}
break;
diff --git a/lib/Target/X86/X86MachineFunctionInfo.cpp b/lib/Target/X86/X86MachineFunctionInfo.cpp
index c9e636f1eb00..3fcb642424ad 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.cpp
+++ b/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -9,6 +9,7 @@
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -20,11 +21,8 @@ void X86MachineFunctionInfo::setRestoreBasePointer(const MachineFunction *MF) {
const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
MF->getSubtarget().getRegisterInfo());
unsigned SlotSize = RegInfo->getSlotSize();
- for (const MCPhysReg *CSR =
- RegInfo->X86RegisterInfo::getCalleeSavedRegs(MF);
- unsigned Reg = *CSR;
- ++CSR)
- {
+ for (const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs();
+ unsigned Reg = *CSR; ++CSR) {
if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
RestoreBasePointerOffset -= SlotSize;
}
diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp
new file mode 100644
index 000000000000..dd21e2b7c4a1
--- /dev/null
+++ b/lib/Target/X86/X86MacroFusion.cpp
@@ -0,0 +1,271 @@
+//===- X86MacroFusion.cpp - X86 Macro Fusion ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// \file This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the DAG scheduling mutation to
+// pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MacroFusion.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define DEBUG_TYPE "misched"
+
+STATISTIC(NumFused, "Number of instr pairs fused");
+
+using namespace llvm;
+
+static cl::opt<bool> EnableMacroFusion("x86-misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+namespace {
+
+/// \brief Verify that the instruction pair, First and Second,
+/// should be scheduled back to back. If either instruction is unspecified,
+/// then verify that the other instruction may be part of a pair at all.
+static bool shouldScheduleAdjacent(const X86Subtarget &ST,
+ const MachineInstr *First,
+ const MachineInstr *Second) {
+ // Check if this processor supports macro-fusion. Since this is a minor
+ // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
+ // proxy for SandyBridge+.
+ if (!ST.hasAVX())
+ return false;
+
+ enum {
+ FuseTest,
+ FuseCmp,
+ FuseInc
+ } FuseKind;
+
+ assert((First || Second) && "At least one instr must be specified");
+ unsigned FirstOpcode = First
+ ? First->getOpcode()
+ : static_cast<unsigned>(X86::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = Second
+ ? Second->getOpcode()
+ : static_cast<unsigned>(X86::INSTRUCTION_LIST_END);
+
+ switch (SecondOpcode) {
+ default:
+ return false;
+ case X86::JE_1:
+ case X86::JNE_1:
+ case X86::JL_1:
+ case X86::JLE_1:
+ case X86::JG_1:
+ case X86::JGE_1:
+ FuseKind = FuseInc;
+ break;
+ case X86::JB_1:
+ case X86::JBE_1:
+ case X86::JA_1:
+ case X86::JAE_1:
+ FuseKind = FuseCmp;
+ break;
+ case X86::JS_1:
+ case X86::JNS_1:
+ case X86::JP_1:
+ case X86::JNP_1:
+ case X86::JO_1:
+ case X86::JNO_1:
+ FuseKind = FuseTest;
+ break;
+ }
+
+ switch (FirstOpcode) {
+ default:
+ return false;
+ case X86::TEST8rr:
+ case X86::TEST16rr:
+ case X86::TEST32rr:
+ case X86::TEST64rr:
+ case X86::TEST8ri:
+ case X86::TEST16ri:
+ case X86::TEST32ri:
+ case X86::TEST32i32:
+ case X86::TEST64i32:
+ case X86::TEST64ri32:
+ case X86::TEST8rm:
+ case X86::TEST16rm:
+ case X86::TEST32rm:
+ case X86::TEST64rm:
+ case X86::TEST8ri_NOREX:
+ case X86::AND16i16:
+ case X86::AND16ri:
+ case X86::AND16ri8:
+ case X86::AND16rm:
+ case X86::AND16rr:
+ case X86::AND32i32:
+ case X86::AND32ri:
+ case X86::AND32ri8:
+ case X86::AND32rm:
+ case X86::AND32rr:
+ case X86::AND64i32:
+ case X86::AND64ri32:
+ case X86::AND64ri8:
+ case X86::AND64rm:
+ case X86::AND64rr:
+ case X86::AND8i8:
+ case X86::AND8ri:
+ case X86::AND8rm:
+ case X86::AND8rr:
+ return true;
+ case X86::CMP16i16:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP16rm:
+ case X86::CMP16rr:
+ case X86::CMP32i32:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP32rm:
+ case X86::CMP32rr:
+ case X86::CMP64i32:
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP64rm:
+ case X86::CMP64rr:
+ case X86::CMP8i8:
+ case X86::CMP8ri:
+ case X86::CMP8rm:
+ case X86::CMP8rr:
+ case X86::ADD16i16:
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16ri8_DB:
+ case X86::ADD16ri_DB:
+ case X86::ADD16rm:
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB:
+ case X86::ADD32i32:
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD32ri8_DB:
+ case X86::ADD32ri_DB:
+ case X86::ADD32rm:
+ case X86::ADD32rr:
+ case X86::ADD32rr_DB:
+ case X86::ADD64i32:
+ case X86::ADD64ri32:
+ case X86::ADD64ri32_DB:
+ case X86::ADD64ri8:
+ case X86::ADD64ri8_DB:
+ case X86::ADD64rm:
+ case X86::ADD64rr:
+ case X86::ADD64rr_DB:
+ case X86::ADD8i8:
+ case X86::ADD8mi:
+ case X86::ADD8mr:
+ case X86::ADD8ri:
+ case X86::ADD8rm:
+ case X86::ADD8rr:
+ case X86::SUB16i16:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB16rm:
+ case X86::SUB16rr:
+ case X86::SUB32i32:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB32rm:
+ case X86::SUB32rr:
+ case X86::SUB64i32:
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB64rm:
+ case X86::SUB64rr:
+ case X86::SUB8i8:
+ case X86::SUB8ri:
+ case X86::SUB8rm:
+ case X86::SUB8rr:
+ return FuseKind == FuseCmp || FuseKind == FuseInc;
+ case X86::INC16r:
+ case X86::INC32r:
+ case X86::INC64r:
+ case X86::INC8r:
+ case X86::DEC16r:
+ case X86::DEC32r:
+ case X86::DEC64r:
+ case X86::DEC8r:
+ return FuseKind == FuseInc;
+ case X86::INSTRUCTION_LIST_END:
+ return true;
+ }
+}
+
+/// \brief Post-process the DAG to create cluster edges between instructions
+/// that may be fused by the processor into a single operation.
+class X86MacroFusion : public ScheduleDAGMutation {
+public:
+ X86MacroFusion() {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+};
+
+void X86MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+ const X86Subtarget &ST = DAG->MF.getSubtarget<X86Subtarget>();
+
+ // For now, assume targets can only fuse with the branch.
+ SUnit &ExitSU = DAG->ExitSU;
+ MachineInstr *Branch = ExitSU.getInstr();
+ if (!Branch || !shouldScheduleAdjacent(ST, nullptr, Branch))
+ return;
+
+ for (SDep &PredDep : ExitSU.Preds) {
+ if (PredDep.isWeak())
+ continue;
+ SUnit &SU = *PredDep.getSUnit();
+ MachineInstr &Pred = *SU.getInstr();
+ if (!shouldScheduleAdjacent(ST, &Pred, Branch))
+ continue;
+
+ // Create a single weak edge from SU to ExitSU. The only effect is to cause
+ // bottom-up scheduling to heavily prioritize the clustered SU. There is no
+ // need to copy predecessor edges from ExitSU to SU, since top-down
+ // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
+ // of SU, we could create an artificial edge from the deepest root, but it
+ // hasn't been needed yet.
+ bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
+ (void)Success;
+ assert(Success && "No DAG nodes should be reachable from ExitSU");
+
+ // Adjust latency of data deps between the nodes.
+ for (SDep &PredDep : ExitSU.Preds)
+ if (PredDep.getSUnit() == &SU)
+ PredDep.setLatency(0);
+ for (SDep &SuccDep : SU.Succs)
+ if (SuccDep.getSUnit() == &ExitSU)
+ SuccDep.setLatency(0);
+
+ ++NumFused;
+ DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse ";
+ SU.print(dbgs(), DAG);
+ dbgs() << " - ExitSU"
+ << " / " << DAG->TII->getName(Pred.getOpcode()) << " - "
+ << DAG->TII->getName(Branch->getOpcode()) << '\n';);
+
+ break;
+ }
+}
+
+} // end namespace
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation>
+createX86MacroFusionDAGMutation () {
+ return EnableMacroFusion ? make_unique<X86MacroFusion>() : nullptr;
+}
+
+} // end namespace llvm
diff --git a/lib/Target/X86/X86MacroFusion.h b/lib/Target/X86/X86MacroFusion.h
new file mode 100644
index 000000000000..e630f802e8e6
--- /dev/null
+++ b/lib/Target/X86/X86MacroFusion.h
@@ -0,0 +1,30 @@
+//===- X86MacroFusion.h - X86 Macro Fusion --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// \file This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 definition of the DAG scheduling mutation to pair
+// instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+
+//===----------------------------------------------------------------------===//
+// X86MacroFusion - DAG post-processing to encourage fusion of macro ops.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createX86MacroFusionDAGMutation());
+/// to X86PassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation>
+createX86MacroFusionDAGMutation();
+
+} // end namespace llvm
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp
index e1447006cd18..debb192732e5 100644
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -389,9 +389,6 @@ bool OptimizeLEAPass::isReplaceable(const MachineInstr &First,
assert(isLEA(First) && isLEA(Last) &&
"The function works only with LEA instructions");
- // Get new address displacement.
- AddrDispShift = getAddrDispShift(Last, 1, First, 1);
-
// Make sure that LEA def registers belong to the same class. There may be
// instructions (like MOV8mr_NOREX) which allow a limited set of registers to
// be used as their operands, so we must be sure that replacing one LEA
@@ -400,10 +397,13 @@ bool OptimizeLEAPass::isReplaceable(const MachineInstr &First,
MRI->getRegClass(Last.getOperand(0).getReg()))
return false;
+ // Get new address displacement.
+ AddrDispShift = getAddrDispShift(Last, 1, First, 1);
+
// Loop over all uses of the Last LEA to check that its def register is
// used only as address base for memory accesses. If so, it can be
// replaced, otherwise - no.
- for (auto &MO : MRI->use_operands(Last.getOperand(0).getReg())) {
+ for (auto &MO : MRI->use_nodbg_operands(Last.getOperand(0).getReg())) {
MachineInstr &MI = *MO.getParent();
// Get the number of the first memory operand.
@@ -563,8 +563,9 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
// Loop over all uses of the Last LEA and update their operands. Note
// that the correctness of this has already been checked in the
// isReplaceable function.
- for (auto UI = MRI->use_begin(Last.getOperand(0).getReg()),
- UE = MRI->use_end();
+ unsigned LastVReg = Last.getOperand(0).getReg();
+ for (auto UI = MRI->use_nodbg_begin(LastVReg),
+ UE = MRI->use_nodbg_end();
UI != UE;) {
MachineOperand &MO = *UI++;
MachineInstr &MI = *MO.getParent();
@@ -586,6 +587,9 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
Op.setOffset(Op.getOffset() + AddrDispShift);
}
+ // Mark debug values referring to Last LEA as undefined.
+ MRI->markUsesInDebugValueAsUndef(LastVReg);
+
// Since we can possibly extend register lifetime, clear kill flags.
MRI->clearKillFlags(First.getOperand(0).getReg());
@@ -594,7 +598,7 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
// By this moment, all of the Last LEA's uses must be replaced. So we
// can freely remove it.
- assert(MRI->use_empty(Last.getOperand(0).getReg()) &&
+ assert(MRI->use_empty(LastVReg) &&
"The LEA's def register must have no uses");
Last.eraseFromParent();
diff --git a/lib/Target/X86/X86RegisterBankInfo.cpp b/lib/Target/X86/X86RegisterBankInfo.cpp
new file mode 100644
index 000000000000..d395c826e6bf
--- /dev/null
+++ b/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -0,0 +1,243 @@
+//===- X86RegisterBankInfo.cpp -----------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the RegisterBankInfo class for X86.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "X86RegisterBankInfo.h"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_TARGET_REGBANK_IMPL
+#include "X86GenRegisterBank.inc"
+
+using namespace llvm;
+// This file will be TableGen'ed at some point.
+#define GET_TARGET_REGBANK_INFO_IMPL
+#include "X86GenRegisterBankInfo.def"
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+X86RegisterBankInfo::X86RegisterBankInfo(const TargetRegisterInfo &TRI)
+ : X86GenRegisterBankInfo() {
+
+ // validate RegBank initialization.
+ const RegisterBank &RBGPR = getRegBank(X86::GPRRegBankID);
+ (void)RBGPR;
+ assert(&X86::GPRRegBank == &RBGPR && "Incorrect RegBanks inizalization.");
+
+ // The GPR register bank is fully defined by all the registers in
+ // GR64 + its subclasses.
+ assert(RBGPR.covers(*TRI.getRegClass(X86::GR64RegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.getSize() == 64 && "GPRs should hold up to 64-bit");
+}
+
+const RegisterBank &X86RegisterBankInfo::getRegBankFromRegClass(
+ const TargetRegisterClass &RC) const {
+
+ if (X86::GR8RegClass.hasSubClassEq(&RC) ||
+ X86::GR16RegClass.hasSubClassEq(&RC) ||
+ X86::GR32RegClass.hasSubClassEq(&RC) ||
+ X86::GR64RegClass.hasSubClassEq(&RC))
+ return getRegBank(X86::GPRRegBankID);
+
+ if (X86::FR32XRegClass.hasSubClassEq(&RC) ||
+ X86::FR64XRegClass.hasSubClassEq(&RC) ||
+ X86::VR128XRegClass.hasSubClassEq(&RC) ||
+ X86::VR256XRegClass.hasSubClassEq(&RC) ||
+ X86::VR512RegClass.hasSubClassEq(&RC))
+ return getRegBank(X86::VECRRegBankID);
+
+ llvm_unreachable("Unsupported register kind yet.");
+}
+
+X86GenRegisterBankInfo::PartialMappingIdx
+X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) {
+ if ((Ty.isScalar() && !isFP) || Ty.isPointer()) {
+ switch (Ty.getSizeInBits()) {
+ case 8:
+ return PMI_GPR8;
+ case 16:
+ return PMI_GPR16;
+ case 32:
+ return PMI_GPR32;
+ case 64:
+ return PMI_GPR64;
+ break;
+ default:
+ llvm_unreachable("Unsupported register size.");
+ }
+ } else if (Ty.isScalar()) {
+ switch (Ty.getSizeInBits()) {
+ case 32:
+ return PMI_FP32;
+ case 64:
+ return PMI_FP64;
+ default:
+ llvm_unreachable("Unsupported register size.");
+ }
+ } else {
+ switch (Ty.getSizeInBits()) {
+ case 128:
+ return PMI_VEC128;
+ case 256:
+ return PMI_VEC256;
+ case 512:
+ return PMI_VEC512;
+ default:
+ llvm_unreachable("Unsupported register size.");
+ }
+ }
+
+ return PMI_None;
+}
+
+void X86RegisterBankInfo::getInstrPartialMappingIdxs(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI, const bool isFP,
+ SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx) {
+
+ unsigned NumOperands = MI.getNumOperands();
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ auto &MO = MI.getOperand(Idx);
+ if (!MO.isReg())
+ OpRegBankIdx[Idx] = PMI_None;
+ else
+ OpRegBankIdx[Idx] = getPartialMappingIdx(MRI.getType(MO.getReg()), isFP);
+ }
+}
+
+bool X86RegisterBankInfo::getInstrValueMapping(
+ const MachineInstr &MI,
+ const SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx,
+ SmallVectorImpl<const ValueMapping *> &OpdsMapping) {
+
+ unsigned NumOperands = MI.getNumOperands();
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ if (!MI.getOperand(Idx).isReg())
+ continue;
+
+ auto Mapping = getValueMapping(OpRegBankIdx[Idx], 1);
+ if (!Mapping->isValid())
+ return false;
+
+ OpdsMapping[Idx] = Mapping;
+ }
+ return true;
+}
+
+RegisterBankInfo::InstructionMapping
+X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI, bool isFP) {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned NumOperands = MI.getNumOperands();
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ if (NumOperands != 3 || (Ty != MRI.getType(MI.getOperand(1).getReg())) ||
+ (Ty != MRI.getType(MI.getOperand(2).getReg())))
+ llvm_unreachable("Unsupported operand mapping yet.");
+
+ auto Mapping = getValueMapping(getPartialMappingIdx(Ty, isFP), 3);
+ return InstructionMapping{DefaultMappingID, 1, Mapping, NumOperands};
+}
+
+RegisterBankInfo::InstructionMapping
+X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ auto Opc = MI.getOpcode();
+
+ // Try the default logic for non-generic instructions that are either copies
+ // or already have some operands assigned to banks.
+ if (!isPreISelGenericOpcode(Opc)) {
+ InstructionMapping Mapping = getInstrMappingImpl(MI);
+ if (Mapping.isValid())
+ return Mapping;
+ }
+
+ switch (Opc) {
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ return getSameOperandsMapping(MI, false);
+ break;
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ return getSameOperandsMapping(MI, true);
+ break;
+ default:
+ break;
+ }
+
+ unsigned NumOperands = MI.getNumOperands();
+
+ // Track the bank of each register, use NotFP mapping (all scalars in GPRs)
+ SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
+ getInstrPartialMappingIdxs(MI, MRI, /* isFP */ false, OpRegBankIdx);
+
+ // Finally construct the computed mapping.
+ SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
+ if (!getInstrValueMapping(MI, OpRegBankIdx, OpdsMapping))
+ return InstructionMapping();
+
+ return InstructionMapping{DefaultMappingID, /* Cost */ 1,
+ getOperandsMapping(OpdsMapping), NumOperands};
+}
+
+void X86RegisterBankInfo::applyMappingImpl(
+ const OperandsMapper &OpdMapper) const {
+ return applyDefaultMapping(OpdMapper);
+}
+
+RegisterBankInfo::InstructionMappings
+X86RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE: {
+ // we going to try to map 32/64 bit to PMI_FP32/PMI_FP64
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
+ if (Size != 32 && Size != 64)
+ break;
+
+ unsigned NumOperands = MI.getNumOperands();
+
+ // Track the bank of each register, use FP mapping (all scalars in VEC)
+ SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
+ getInstrPartialMappingIdxs(MI, MRI, /* isFP */ true, OpRegBankIdx);
+
+ // Finally construct the computed mapping.
+ SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
+ if (!getInstrValueMapping(MI, OpRegBankIdx, OpdsMapping))
+ break;
+
+ RegisterBankInfo::InstructionMapping Mapping = InstructionMapping{
+ /*ID*/ 1, /*Cost*/ 1, getOperandsMapping(OpdsMapping), NumOperands};
+ InstructionMappings AltMappings;
+ AltMappings.emplace_back(std::move(Mapping));
+ return AltMappings;
+ }
+ default:
+ break;
+ }
+ return RegisterBankInfo::getInstrAlternativeMappings(MI);
+}
diff --git a/lib/Target/X86/X86RegisterBankInfo.h b/lib/Target/X86/X86RegisterBankInfo.h
new file mode 100644
index 000000000000..a1e01a9ab949
--- /dev/null
+++ b/lib/Target/X86/X86RegisterBankInfo.h
@@ -0,0 +1,81 @@
+//===- X86RegisterBankInfo ---------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the RegisterBankInfo class for X86.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_X86REGISTERBANKINFO_H
+#define LLVM_LIB_TARGET_X86_X86REGISTERBANKINFO_H
+
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+
+#define GET_REGBANK_DECLARATIONS
+#include "X86GenRegisterBank.inc"
+
+namespace llvm {
+
+class LLT;
+
+class X86GenRegisterBankInfo : public RegisterBankInfo {
+protected:
+#define GET_TARGET_REGBANK_CLASS
+#include "X86GenRegisterBank.inc"
+#define GET_TARGET_REGBANK_INFO_CLASS
+#include "X86GenRegisterBankInfo.def"
+
+ static RegisterBankInfo::PartialMapping PartMappings[];
+ static RegisterBankInfo::ValueMapping ValMappings[];
+
+ static PartialMappingIdx getPartialMappingIdx(const LLT &Ty, bool isFP);
+ static const RegisterBankInfo::ValueMapping *
+ getValueMapping(PartialMappingIdx Idx, unsigned NumOperands);
+};
+
+class TargetRegisterInfo;
+
+/// This class provides the information for the target register banks.
+class X86RegisterBankInfo final : public X86GenRegisterBankInfo {
+private:
+ /// Get an instruction mapping.
+ /// \return An InstructionMappings with a statically allocated
+ /// OperandsMapping.
+ static InstructionMapping getSameOperandsMapping(const MachineInstr &MI,
+ bool isFP);
+
+ /// Track the bank of each instruction operand(register)
+ static void
+ getInstrPartialMappingIdxs(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI, const bool isFP,
+ SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx);
+
+ /// Construct the instruction ValueMapping from PartialMappingIdxs
+ /// \return true if mapping succeeded.
+ static bool
+ getInstrValueMapping(const MachineInstr &MI,
+ const SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx,
+ SmallVectorImpl<const ValueMapping *> &OpdsMapping);
+
+public:
+ X86RegisterBankInfo(const TargetRegisterInfo &TRI);
+
+ const RegisterBank &
+ getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
+
+ InstructionMappings
+ getInstrAlternativeMappings(const MachineInstr &MI) const override;
+
+ /// See RegisterBankInfo::applyMapping.
+ void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+
+ InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
+};
+
+} // namespace llvm
+#endif
diff --git a/lib/Target/X86/X86RegisterBanks.td b/lib/Target/X86/X86RegisterBanks.td
new file mode 100644
index 000000000000..6d17cd53a0c1
--- /dev/null
+++ b/lib/Target/X86/X86RegisterBanks.td
@@ -0,0 +1,17 @@
+//=- X86RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+/// General Purpose Registers: RAX, RCX,...
+def GPRRegBank : RegisterBank<"GPR", [GR64]>;
+
+/// Floating Point/Vector Registers
+def VECRRegBank : RegisterBank<"VECR", [VR512]>;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 65f438f94b04..9bab9a4cf3ba 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -80,7 +80,7 @@ X86RegisterInfo::X86RegisterInfo(const Triple &TT)
bool
X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- // ExeDepsFixer and PostRAScheduler require liveness.
+ // ExecutionDepsFixer and PostRAScheduler require liveness.
return true;
}
@@ -337,7 +337,9 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_AllRegs_AVX512_SaveList;
if (HasAVX)
return CSR_64_AllRegs_AVX_SaveList;
- return CSR_64_AllRegs_SaveList;
+ if (HasSSE)
+ return CSR_64_AllRegs_SaveList;
+ return CSR_64_AllRegs_NoSSE_SaveList;
} else {
if (HasAVX512)
return CSR_32_AllRegs_AVX512_SaveList;
@@ -447,7 +449,9 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_64_AllRegs_AVX512_RegMask;
if (HasAVX)
return CSR_64_AllRegs_AVX_RegMask;
- return CSR_64_AllRegs_RegMask;
+ if (HasSSE)
+ return CSR_64_AllRegs_RegMask;
+ return CSR_64_AllRegs_NoSSE_RegMask;
} else {
if (HasAVX512)
return CSR_32_AllRegs_AVX512_RegMask;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 372a15aff15a..c177ba1d52f7 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -189,22 +189,22 @@ def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>;
def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>;
def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>;
-def XMM16: X86Reg<"xmm16", 16>, DwarfRegNum<[60, -2, -2]>;
-def XMM17: X86Reg<"xmm17", 17>, DwarfRegNum<[61, -2, -2]>;
-def XMM18: X86Reg<"xmm18", 18>, DwarfRegNum<[62, -2, -2]>;
-def XMM19: X86Reg<"xmm19", 19>, DwarfRegNum<[63, -2, -2]>;
-def XMM20: X86Reg<"xmm20", 20>, DwarfRegNum<[64, -2, -2]>;
-def XMM21: X86Reg<"xmm21", 21>, DwarfRegNum<[65, -2, -2]>;
-def XMM22: X86Reg<"xmm22", 22>, DwarfRegNum<[66, -2, -2]>;
-def XMM23: X86Reg<"xmm23", 23>, DwarfRegNum<[67, -2, -2]>;
-def XMM24: X86Reg<"xmm24", 24>, DwarfRegNum<[68, -2, -2]>;
-def XMM25: X86Reg<"xmm25", 25>, DwarfRegNum<[69, -2, -2]>;
-def XMM26: X86Reg<"xmm26", 26>, DwarfRegNum<[70, -2, -2]>;
-def XMM27: X86Reg<"xmm27", 27>, DwarfRegNum<[71, -2, -2]>;
-def XMM28: X86Reg<"xmm28", 28>, DwarfRegNum<[72, -2, -2]>;
-def XMM29: X86Reg<"xmm29", 29>, DwarfRegNum<[73, -2, -2]>;
-def XMM30: X86Reg<"xmm30", 30>, DwarfRegNum<[74, -2, -2]>;
-def XMM31: X86Reg<"xmm31", 31>, DwarfRegNum<[75, -2, -2]>;
+def XMM16: X86Reg<"xmm16", 16>, DwarfRegNum<[67, -2, -2]>;
+def XMM17: X86Reg<"xmm17", 17>, DwarfRegNum<[68, -2, -2]>;
+def XMM18: X86Reg<"xmm18", 18>, DwarfRegNum<[69, -2, -2]>;
+def XMM19: X86Reg<"xmm19", 19>, DwarfRegNum<[70, -2, -2]>;
+def XMM20: X86Reg<"xmm20", 20>, DwarfRegNum<[71, -2, -2]>;
+def XMM21: X86Reg<"xmm21", 21>, DwarfRegNum<[72, -2, -2]>;
+def XMM22: X86Reg<"xmm22", 22>, DwarfRegNum<[73, -2, -2]>;
+def XMM23: X86Reg<"xmm23", 23>, DwarfRegNum<[74, -2, -2]>;
+def XMM24: X86Reg<"xmm24", 24>, DwarfRegNum<[75, -2, -2]>;
+def XMM25: X86Reg<"xmm25", 25>, DwarfRegNum<[76, -2, -2]>;
+def XMM26: X86Reg<"xmm26", 26>, DwarfRegNum<[77, -2, -2]>;
+def XMM27: X86Reg<"xmm27", 27>, DwarfRegNum<[78, -2, -2]>;
+def XMM28: X86Reg<"xmm28", 28>, DwarfRegNum<[79, -2, -2]>;
+def XMM29: X86Reg<"xmm29", 29>, DwarfRegNum<[80, -2, -2]>;
+def XMM30: X86Reg<"xmm30", 30>, DwarfRegNum<[81, -2, -2]>;
+def XMM31: X86Reg<"xmm31", 31>, DwarfRegNum<[82, -2, -2]>;
} // CostPerUse
@@ -437,8 +437,10 @@ def LOW32_ADDR_ACCESS : RegisterClass<"X86", [i32], 32, (add GR32, RIP)>;
def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32,
(add LOW32_ADDR_ACCESS, RBP)>;
-// A class to support the 'A' assembler constraint: EAX then EDX.
+// A class to support the 'A' assembler constraint: [ER]AX then [ER]DX.
+def GR16_AD : RegisterClass<"X86", [i16], 16, (add AX, DX)>;
def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
+def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>;
// Scalar SSE2 floating point registers.
def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 35257f89100c..7f7efd7cad3f 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -366,6 +366,7 @@ def IIC_SSE_MWAIT : InstrItinClass;
def IIC_SSE_MONITOR : InstrItinClass;
def IIC_SSE_MWAITX : InstrItinClass;
def IIC_SSE_MONITORX : InstrItinClass;
+def IIC_SSE_CLZERO : InstrItinClass;
def IIC_SSE_PREFETCH : InstrItinClass;
def IIC_SSE_PAUSE : InstrItinClass;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index f031a281e5dd..9da8a18965ea 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -85,10 +85,12 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args))
- .setDiscardResult();
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(bzeroEntry, IntPtr),
+ std::move(Args))
+ .setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
diff --git a/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index 11115524c810..2cebb76022ef 100644
--- a/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -14,7 +14,7 @@
#include "X86ShuffleDecodeConstantPool.h"
#include "Utils/X86ShuffleDecode.h"
-#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/IR/Constants.h"
@@ -25,7 +25,7 @@
namespace llvm {
static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
- SmallBitVector &UndefElts,
+ APInt &UndefElts,
SmallVectorImpl<uint64_t> &RawMask) {
// It is not an error for shuffle masks to not be a vector of
// MaskEltSizeInBits because the constant pool uniques constants by their
@@ -49,6 +49,33 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
unsigned NumCstElts = CstTy->getVectorNumElements();
+ assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
+ "Unaligned shuffle mask size");
+
+ unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
+ UndefElts = APInt(NumMaskElts, 0);
+ RawMask.resize(NumMaskElts, 0);
+
+ // Fast path - if the constants match the mask size then copy direct.
+ if (MaskEltSizeInBits == CstEltSizeInBits) {
+ assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
+ return false;
+
+ if (isa<UndefValue>(COp)) {
+ UndefElts.setBit(i);
+ RawMask[i] = 0;
+ continue;
+ }
+
+ auto *Elt = cast<ConstantInt>(COp);
+ RawMask[i] = Elt->getValue().getZExtValue();
+ }
+ return true;
+ }
+
// Extract all the undef/constant element data and pack into single bitsets.
APInt UndefBits(CstSizeInBits, 0);
APInt MaskBits(CstSizeInBits, 0);
@@ -57,39 +84,30 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
return false;
+ unsigned BitOffset = i * CstEltSizeInBits;
+
if (isa<UndefValue>(COp)) {
- APInt EltUndef = APInt::getLowBitsSet(CstSizeInBits, CstEltSizeInBits);
- UndefBits |= EltUndef.shl(i * CstEltSizeInBits);
+ UndefBits.setBits(BitOffset, BitOffset + CstEltSizeInBits);
continue;
}
- APInt EltBits = cast<ConstantInt>(COp)->getValue();
- EltBits = EltBits.zextOrTrunc(CstSizeInBits);
- MaskBits |= EltBits.shl(i * CstEltSizeInBits);
+ MaskBits.insertBits(cast<ConstantInt>(COp)->getValue(), BitOffset);
}
// Now extract the undef/constant bit data into the raw shuffle masks.
- assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
- "Unaligned shuffle mask size");
-
- unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
- UndefElts = SmallBitVector(NumMaskElts, false);
- RawMask.resize(NumMaskElts, 0);
-
for (unsigned i = 0; i != NumMaskElts; ++i) {
- APInt EltUndef = UndefBits.lshr(i * MaskEltSizeInBits);
- EltUndef = EltUndef.zextOrTrunc(MaskEltSizeInBits);
+ unsigned BitOffset = i * MaskEltSizeInBits;
+ APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);
// Only treat the element as UNDEF if all bits are UNDEF, otherwise
// treat it as zero.
if (EltUndef.isAllOnesValue()) {
- UndefElts[i] = true;
+ UndefElts.setBit(i);
RawMask[i] = 0;
continue;
}
- APInt EltBits = MaskBits.lshr(i * MaskEltSizeInBits);
- EltBits = EltBits.zextOrTrunc(MaskEltSizeInBits);
+ APInt EltBits = MaskBits.extractBits(MaskEltSizeInBits, BitOffset);
RawMask[i] = EltBits.getZExtValue();
}
@@ -104,8 +122,8 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
"Unexpected vector size.");
// The shuffle mask requires a byte vector.
- SmallBitVector UndefElts;
- SmallVector<uint64_t, 32> RawMask;
+ APInt UndefElts;
+ SmallVector<uint64_t, 64> RawMask;
if (!extractConstantMask(C, 8, UndefElts, RawMask))
return;
@@ -145,8 +163,8 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");
// The shuffle mask requires elements the same size as the target.
- SmallBitVector UndefElts;
- SmallVector<uint64_t, 8> RawMask;
+ APInt UndefElts;
+ SmallVector<uint64_t, 16> RawMask;
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
@@ -180,7 +198,7 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
assert((MaskTySize == 128 || MaskTySize == 256) && "Unexpected vector size.");
// The shuffle mask requires elements the same size as the target.
- SmallBitVector UndefElts;
+ APInt UndefElts;
SmallVector<uint64_t, 8> RawMask;
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
@@ -231,8 +249,8 @@ void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
"Unexpected vector size.");
// The shuffle mask requires a byte vector.
- SmallBitVector UndefElts;
- SmallVector<uint64_t, 32> RawMask;
+ APInt UndefElts;
+ SmallVector<uint64_t, 16> RawMask;
if (!extractConstantMask(C, 8, UndefElts, RawMask))
return;
@@ -286,8 +304,8 @@ void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
"Unexpected vector element size.");
// The shuffle mask requires elements the same size as the target.
- SmallBitVector UndefElts;
- SmallVector<uint64_t, 8> RawMask;
+ APInt UndefElts;
+ SmallVector<uint64_t, 64> RawMask;
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
@@ -314,8 +332,8 @@ void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
"Unexpected vector element size.");
// The shuffle mask requires elements the same size as the target.
- SmallBitVector UndefElts;
- SmallVector<uint64_t, 8> RawMask;
+ APInt UndefElts;
+ SmallVector<uint64_t, 64> RawMask;
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 586bb7bd7b1a..92a68759195c 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -11,19 +11,23 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/X86BaseInfo.h"
#include "X86Subtarget.h"
-#include "X86InstrInfo.h"
#include "X86TargetMachine.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <string>
#if defined(_MSC_VER)
#include <intrin.h>
@@ -93,8 +97,17 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
return X86II::MO_NO_FLAG;
// Absolute symbols can be referenced directly.
- if (GV && GV->isAbsoluteSymbolRef())
- return X86II::MO_NO_FLAG;
+ if (GV) {
+ if (Optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) {
+ // See if we can use the 8-bit immediate form. Note that some instructions
+ // will sign extend the immediate operand, so to be conservative we only
+ // accept the range [0,128).
+ if (CR->getUnsignedMax().ult(128))
+ return X86II::MO_ABS8;
+ else
+ return X86II::MO_NO_FLAG;
+ }
+ }
if (TM.shouldAssumeDSOLocal(M, GV))
return classifyLocalReference(GV);
@@ -195,7 +208,6 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
FullFS = "+sahf";
}
-
// Parse features string and set the CPU.
ParseSubtargetFeatures(CPUName, FullFS);
@@ -263,7 +275,6 @@ void X86Subtarget::initializeEnvironment() {
HasVBMI = false;
HasIFMA = false;
HasRTM = false;
- HasHLE = false;
HasERI = false;
HasCDI = false;
HasPFI = false;
@@ -277,6 +288,7 @@ void X86Subtarget::initializeEnvironment() {
HasRDSEED = false;
HasLAHFSAHF = false;
HasMWAITX = false;
+ HasCLZERO = false;
HasMPX = false;
IsBTMemSlow = false;
IsPMULLDSlow = false;
@@ -286,10 +298,11 @@ void X86Subtarget::initializeEnvironment() {
HasSSEUnalignedMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
- HasFastPartialYMMWrite = false;
+ HasFastPartialYMMorZMMWrite = false;
HasFastScalarFSQRT = false;
HasFastVectorFSQRT = false;
HasFastLZCNT = false;
+ HasFastSHLDRotate = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;
PadShortFunctions = false;
@@ -321,7 +334,7 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
TargetTriple.getEnvironment() != Triple::CODE16),
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() == Triple::CODE16),
- TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
// Determine the PICStyle based on the target selected.
if (!isPositionIndependent())
@@ -359,4 +372,3 @@ const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {
bool X86Subtarget::enableEarlyIfConversion() const {
return hasCMov() && X86EarlyIfConv;
}
-
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index d80dc4a9b5e8..d0d88d326949 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -18,33 +18,36 @@
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
#include "X86SelectionDAGInfo.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <string>
+#include <memory>
#define GET_SUBTARGETINFO_HEADER
#include "X86GenSubtargetInfo.inc"
namespace llvm {
+
class GlobalValue;
-class StringRef;
-class TargetMachine;
/// The X86 backend supports a number of different styles of PIC.
///
namespace PICStyles {
+
enum Style {
StubPIC, // Used on i386-darwin in pic mode.
GOT, // Used on 32 bit elf on when in pic mode.
RIPRel, // Used on X86-64 when in pic mode.
None // Set when not in pic mode.
};
-}
-class X86Subtarget final : public X86GenSubtargetInfo {
+} // end namespace PICStyles
+class X86Subtarget final : public X86GenSubtargetInfo {
protected:
enum X86SSEEnum {
NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
@@ -96,10 +99,13 @@ protected:
/// Target has XSAVE instructions
bool HasXSAVE;
+
/// Target has XSAVEOPT instructions
bool HasXSAVEOPT;
+
/// Target has XSAVEC instructions
bool HasXSAVEC;
+
/// Target has XSAVES instructions
bool HasXSAVES;
@@ -148,9 +154,6 @@ protected:
/// Processor has RTM instructions.
bool HasRTM;
- /// Processor has HLE.
- bool HasHLE;
-
/// Processor has ADX instructions.
bool HasADX;
@@ -169,6 +172,9 @@ protected:
/// Processor has MONITORX/MWAITX instructions.
bool HasMWAITX;
+ /// Processor has Cache Line Zero instruction
+ bool HasCLZERO;
+
/// Processor has Prefetch with intent to Write instruction
bool HasPFPREFETCHWT1;
@@ -201,8 +207,8 @@ protected:
bool UseLeaForSP;
/// True if there is no performance penalty to writing only the lower parts
- /// of a YMM register without clearing the upper part.
- bool HasFastPartialYMMWrite;
+ /// of a YMM or ZMM register without clearing the upper part.
+ bool HasFastPartialYMMorZMMWrite;
/// True if hardware SQRTSS instruction is at least as fast (latency) as
/// RSQRTSS followed by a Newton-Raphson iteration.
@@ -223,6 +229,9 @@ protected:
/// True if LZCNT instruction is fast.
bool HasFastLZCNT;
+ /// True if SHLD based rotate is fast.
+ bool HasFastSHLDRotate;
+
/// True if the short functions should be padded to prevent
/// a stall when returning too early.
bool PadShortFunctions;
@@ -265,24 +274,12 @@ protected:
/// Processor supports MPX - Memory Protection Extensions
bool HasMPX;
- /// Processor supports Invalidate Process-Context Identifier
- bool HasInvPCId;
-
- /// Processor has VM Functions
- bool HasVMFUNC;
-
- /// Processor has Supervisor Mode Access Protection
- bool HasSMAP;
-
/// Processor has Software Guard Extensions
bool HasSGX;
/// Processor supports Flush Cache Line instruction
bool HasCLFLUSHOPT;
- /// Processor has Persistent Commit feature
- bool HasPCOMMIT;
-
/// Processor supports Cache Line Write Back instruction
bool HasCLWB;
@@ -307,8 +304,8 @@ protected:
/// This is used to avoid ifndefs spreading around while GISel is
/// an optional library.
std::unique_ptr<GISelAccessor> GISel;
-private:
+private:
/// Override the stack alignment.
unsigned StackAlignOverride;
@@ -341,13 +338,17 @@ public:
const X86TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
+
const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+
const X86FrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
+
const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
+
const X86RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
@@ -370,12 +371,14 @@ public:
const InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
+
private:
/// Initialize the full set of dependencies so we can use an initializer
/// list for X86Subtarget.
X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
void initializeEnvironment();
void initSubtargetFeatures(StringRef CPU, StringRef FS);
+
public:
/// Is this x86_64? (disregarding specific ABI / programming model)
bool is64Bit() const {
@@ -432,9 +435,9 @@ public:
bool hasPCLMUL() const { return HasPCLMUL; }
// Prefer FMA4 to FMA - its better for commutation/memory folding and
// has equal or better performance on all supported targets.
- bool hasFMA() const { return HasFMA && !HasFMA4; }
+ bool hasFMA() const { return (HasFMA || hasAVX512()) && !HasFMA4; }
bool hasFMA4() const { return HasFMA4; }
- bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); }
+ bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
bool hasXOP() const { return HasXOP; }
bool hasTBM() const { return HasTBM; }
bool hasMOVBE() const { return HasMOVBE; }
@@ -447,13 +450,13 @@ public:
bool hasVBMI() const { return HasVBMI; }
bool hasIFMA() const { return HasIFMA; }
bool hasRTM() const { return HasRTM; }
- bool hasHLE() const { return HasHLE; }
bool hasADX() const { return HasADX; }
bool hasSHA() const { return HasSHA; }
bool hasPRFCHW() const { return HasPRFCHW; }
bool hasRDSEED() const { return HasRDSEED; }
bool hasLAHFSAHF() const { return HasLAHFSAHF; }
bool hasMWAITX() const { return HasMWAITX; }
+ bool hasCLZERO() const { return HasCLZERO; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isPMULLDSlow() const { return IsPMULLDSlow; }
@@ -462,10 +465,13 @@ public:
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
- bool hasFastPartialYMMWrite() const { return HasFastPartialYMMWrite; }
+ bool hasFastPartialYMMorZMMWrite() const {
+ return HasFastPartialYMMorZMMWrite;
+ }
bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
bool hasFastLZCNT() const { return HasFastLZCNT; }
+ bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
@@ -481,8 +487,9 @@ public:
bool hasVLX() const { return HasVLX; }
bool hasPKU() const { return HasPKU; }
bool hasMPX() const { return HasMPX; }
+ bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
- virtual bool isXRaySupported() const override { return is64Bit(); }
+ bool isXRaySupported() const override { return is64Bit(); }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }
@@ -513,6 +520,7 @@ public:
bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
+ bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetWindowsMSVC() const {
return TargetTriple.isWindowsMSVCEnvironment();
@@ -616,6 +624,9 @@ public:
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
+ // TODO: Update the regression tests and return true.
+ bool supportPrintSchedInfo() const override { return false; }
+
bool enableEarlyIfConversion() const override;
/// Return the instruction itineraries based on the subtarget selection.
@@ -628,6 +639,6 @@ public:
}
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index aa5cfc64e9eb..03a1958121ab 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -11,22 +11,47 @@
//
//===----------------------------------------------------------------------===//
-#include "X86TargetMachine.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "X86.h"
#include "X86CallLowering.h"
+#include "X86LegalizerInfo.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "X86RegisterBankInfo.h"
+#endif
+#include "X86MacroFusion.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "X86TargetTransformInfo.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/ExecutionDepsFix.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
+#include <memory>
+#include <string>
+
using namespace llvm;
static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
@@ -34,8 +59,11 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
cl::init(true), cl::Hidden);
namespace llvm {
+
void initializeWinEHStatePassPass(PassRegistry &);
-}
+void initializeX86ExecutionDepsFixPass(PassRegistry &);
+
+} // end namespace llvm
extern "C" void LLVMInitializeX86Target() {
// Register the target.
@@ -47,27 +75,28 @@ extern "C" void LLVMInitializeX86Target() {
initializeWinEHStatePassPass(PR);
initializeFixupBWInstPassPass(PR);
initializeEvexToVexInstPassPass(PR);
+ initializeX86ExecutionDepsFixPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO()) {
if (TT.getArch() == Triple::x86_64)
- return make_unique<X86_64MachoTargetObjectFile>();
- return make_unique<TargetLoweringObjectFileMachO>();
+ return llvm::make_unique<X86_64MachoTargetObjectFile>();
+ return llvm::make_unique<TargetLoweringObjectFileMachO>();
}
if (TT.isOSFreeBSD())
- return make_unique<X86FreeBSDTargetObjectFile>();
+ return llvm::make_unique<X86FreeBSDTargetObjectFile>();
if (TT.isOSLinux() || TT.isOSNaCl())
- return make_unique<X86LinuxNaClTargetObjectFile>();
+ return llvm::make_unique<X86LinuxNaClTargetObjectFile>();
if (TT.isOSFuchsia())
- return make_unique<X86FuchsiaTargetObjectFile>();
+ return llvm::make_unique<X86FuchsiaTargetObjectFile>();
if (TT.isOSBinFormatELF())
- return make_unique<X86ELFTargetObjectFile>();
+ return llvm::make_unique<X86ELFTargetObjectFile>();
if (TT.isKnownWindowsMSVCEnvironment() || TT.isWindowsCoreCLREnvironment())
- return make_unique<X86WindowsTargetObjectFile>();
+ return llvm::make_unique<X86WindowsTargetObjectFile>();
if (TT.isOSBinFormatCOFF())
- return make_unique<TargetLoweringObjectFileCOFF>();
+ return llvm::make_unique<TargetLoweringObjectFileCOFF>();
llvm_unreachable("unknown subtarget type");
}
@@ -177,31 +206,37 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
initAsmInfo();
}
-X86TargetMachine::~X86TargetMachine() {}
+X86TargetMachine::~X86TargetMachine() = default;
#ifdef LLVM_BUILD_GLOBAL_ISEL
namespace {
+
struct X86GISelActualAccessor : public GISelAccessor {
- std::unique_ptr<CallLowering> CL;
- X86GISelActualAccessor(CallLowering* CL): CL(CL) {}
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+
const CallLowering *getCallLowering() const override {
- return CL.get();
+ return CallLoweringInfo.get();
}
+
const InstructionSelector *getInstructionSelector() const override {
- //TODO: Implement
- return nullptr;
+ return InstSelector.get();
}
+
const LegalizerInfo *getLegalizerInfo() const override {
- //TODO: Implement
- return nullptr;
+ return Legalizer.get();
}
+
const RegisterBankInfo *getRegBankInfo() const override {
- //TODO: Implement
- return nullptr;
+ return RegBankInfo.get();
}
};
-} // End anonymous namespace.
+
+} // end anonymous namespace
#endif
+
const X86Subtarget *
X86TargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
@@ -244,8 +279,14 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
#ifndef LLVM_BUILD_GLOBAL_ISEL
GISelAccessor *GISel = new GISelAccessor();
#else
- X86GISelActualAccessor *GISel = new X86GISelActualAccessor(
- new X86CallLowering(*I->getTargetLowering()));
+ X86GISelActualAccessor *GISel = new X86GISelActualAccessor();
+
+ GISel->CallLoweringInfo.reset(new X86CallLowering(*I->getTargetLowering()));
+ GISel->Legalizer.reset(new X86LegalizerInfo(*I, *this));
+
+ auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo());
+ GISel->RegBankInfo.reset(RBI);
+ GISel->InstSelector.reset(createX86InstructionSelector(*I, *RBI));
#endif
I->setGISelAccessor(*GISel);
}
@@ -270,12 +311,12 @@ TargetIRAnalysis X86TargetMachine::getTargetIRAnalysis() {
});
}
-
//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
namespace {
+
/// X86 Code Generator Pass Configuration Options.
class X86PassConfig : public TargetPassConfig {
public:
@@ -289,7 +330,7 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
- DAG->addMutation(createMacroFusionDAGMutation(DAG->TII));
+ DAG->addMutation(createX86MacroFusionDAGMutation());
return DAG;
}
@@ -301,14 +342,28 @@ public:
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
#endif
-bool addILPOpts() override;
+ bool addILPOpts() override;
bool addPreISel() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
void addPreEmitPass() override;
void addPreSched2() override;
};
-} // namespace
+
+class X86ExecutionDepsFix : public ExecutionDepsFix {
+public:
+ static char ID;
+ X86ExecutionDepsFix() : ExecutionDepsFix(ID, X86::VR128XRegClass) {}
+ StringRef getPassName() const override {
+ return "X86 Execution Dependency Fix";
+ }
+};
+char X86ExecutionDepsFix::ID;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(X86ExecutionDepsFix, "x86-execution-deps-fix",
+ "X86 Execution Dependency Fix", false, false)
TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
return new X86PassConfig(this, PM);
@@ -343,17 +398,17 @@ bool X86PassConfig::addIRTranslator() {
}
bool X86PassConfig::addLegalizeMachineIR() {
- //TODO: Implement
+ addPass(new Legalizer());
return false;
}
bool X86PassConfig::addRegBankSelect() {
- //TODO: Implement
+ addPass(new RegBankSelect());
return false;
}
bool X86PassConfig::addGlobalInstructionSelect() {
- //TODO: Implement
+ addPass(new InstructionSelect());
return false;
}
#endif
@@ -391,7 +446,7 @@ void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
void X86PassConfig::addPreEmitPass() {
if (getOptLevel() != CodeGenOpt::None)
- addPass(createExecutionDependencyFixPass(&X86::VR128XRegClass));
+ addPass(new X86ExecutionDepsFix());
if (UseVZeroUpper)
addPass(createX86IssueVZeroUpperPass());
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index d756d07926dd..cf933f52604e 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -13,14 +13,20 @@
#ifndef LLVM_LIB_TARGET_X86_X86TARGETMACHINE_H
#define LLVM_LIB_TARGET_X86_X86TARGETMACHINE_H
-#include "X86InstrInfo.h"
+
#include "X86Subtarget.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
+#include <memory>
namespace llvm {
class StringRef;
+class X86Subtarget;
+class X86RegisterBankInfo;
class X86TargetMachine final : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
@@ -32,17 +38,19 @@ public:
Optional<Reloc::Model> RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
~X86TargetMachine() override;
+
const X86Subtarget *getSubtargetImpl(const Function &F) const override;
TargetIRAnalysis getTargetIRAnalysis() override;
// Set up the pass pipeline.
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_X86_X86TARGETMACHINE_H
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 5715d826862e..b742fb472372 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -78,7 +78,7 @@ unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) {
return 8;
}
-unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
+unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) const {
if (Vector) {
if (ST->hasAVX512())
return 512;
@@ -95,6 +95,10 @@ unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
return 32;
}
+unsigned X86TTIImpl::getLoadStoreVecRegBitWidth(unsigned) const {
+ return getRegisterBitWidth(true);
+}
+
unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
// If the loop will not be vectorized, don't interleave the loop.
// Let regular unroll to unroll the loop, which saves the overflow
@@ -114,7 +118,7 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
}
int X86TTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
+ unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo,
@@ -207,6 +211,10 @@ int X86TTIImpl::getArithmeticInstrCost(
}
static const CostTblEntry AVX512UniformConstCostTable[] = {
+ { ISD::SRA, MVT::v2i64, 1 },
+ { ISD::SRA, MVT::v4i64, 1 },
+ { ISD::SRA, MVT::v8i64, 1 },
+
{ ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
{ ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
};
@@ -319,6 +327,14 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
static const CostTblEntry AVX512BWCostTable[] = {
+ { ISD::SHL, MVT::v8i16, 1 }, // vpsllvw
+ { ISD::SRL, MVT::v8i16, 1 }, // vpsrlvw
+ { ISD::SRA, MVT::v8i16, 1 }, // vpsravw
+
+ { ISD::SHL, MVT::v16i16, 1 }, // vpsllvw
+ { ISD::SRL, MVT::v16i16, 1 }, // vpsrlvw
+ { ISD::SRA, MVT::v16i16, 1 }, // vpsravw
+
{ ISD::SHL, MVT::v32i16, 1 }, // vpsllvw
{ ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw
{ ISD::SRA, MVT::v32i16, 1 }, // vpsravw
@@ -347,8 +363,12 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v16i32, 1 },
{ ISD::SRL, MVT::v16i32, 1 },
{ ISD::SRA, MVT::v16i32, 1 },
+
{ ISD::SHL, MVT::v8i64, 1 },
{ ISD::SRL, MVT::v8i64, 1 },
+
+ { ISD::SRA, MVT::v2i64, 1 },
+ { ISD::SRA, MVT::v4i64, 1 },
{ ISD::SRA, MVT::v8i64, 1 },
{ ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence.
@@ -595,7 +615,6 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
- { ISD::SHL, MVT::v8i32, 2*2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence.
{ ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
@@ -804,7 +823,14 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
{ TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
- { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb
+ { TTI::SK_Alternate, MVT::v32i8, 1 }, // vpblendvb
+
+ { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
+ { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 + 2 * vpshufb
+ // + vpblendvb
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 } // vperm2i128 + 2 * vpshufb
+ // + vpblendvb
};
if (ST->hasAVX2())
@@ -861,7 +887,10 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
{ TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
- { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 }, // pshufb + pshufb + por
+
+ { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb
+ { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 } // pshufb
};
if (ST->hasSSSE3())
@@ -886,7 +915,10 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
{ TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
{ TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
- { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 }, // pand + pandn + por
+
+ { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd
+ { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 } // pshufd
};
if (ST->hasSSE2())
@@ -906,7 +938,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
-int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -1272,7 +1305,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
@@ -1338,11 +1372,12 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF) {
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) {
// Costs should match the codegen from:
// BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll
// BSWAP: llvm\test\CodeGen\X86\bswap-vector.ll
@@ -1418,8 +1453,8 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSQRT, MVT::v4f64, 43 }, // SNB from http://www.agner.org/
};
static const CostTblEntry SSE42CostTbl[] = {
- { ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
+ { ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
};
static const CostTblEntry SSSE3CostTbl[] = {
{ ISD::BITREVERSE, MVT::v2i64, 5 },
@@ -1443,6 +1478,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTTZ, MVT::v16i8, 9 }
};
static const CostTblEntry SSE2CostTbl[] = {
+ { ISD::BITREVERSE, MVT::v2i64, 29 },
+ { ISD::BITREVERSE, MVT::v4i32, 27 },
+ { ISD::BITREVERSE, MVT::v8i16, 27 },
+ { ISD::BITREVERSE, MVT::v16i8, 20 },
{ ISD::BSWAP, MVT::v2i64, 7 },
{ ISD::BSWAP, MVT::v4i32, 7 },
{ ISD::BSWAP, MVT::v8i16, 7 },
@@ -1462,8 +1501,16 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSQRT, MVT::v2f64, 32 }, // Nehalem from http://www.agner.org/
};
static const CostTblEntry SSE1CostTbl[] = {
- { ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
+ { ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
+ };
+ static const CostTblEntry X64CostTbl[] = { // 64-bit targets
+ { ISD::BITREVERSE, MVT::i64, 14 }
+ };
+ static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
+ { ISD::BITREVERSE, MVT::i32, 14 },
+ { ISD::BITREVERSE, MVT::i16, 14 },
+ { ISD::BITREVERSE, MVT::i8, 11 }
};
unsigned ISD = ISD::DELETED_NODE;
@@ -1523,12 +1570,19 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
- return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF);
+ if (ST->is64Bit())
+ if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, ScalarizationCostPassed);
}
int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF) {
- return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF);
+ ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
+ return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF);
}
int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@@ -1562,22 +1616,8 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
}
-int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
- assert (Ty->isVectorTy() && "Can only scalarize vectors");
- int Cost = 0;
-
- for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
- if (Insert)
- Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
- if (Extract)
- Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i);
- }
-
- return Cost;
-}
-
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace, const Instruction *I) {
// Handle non-power-of-two vectors such as <3 x float>
if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
unsigned NumElem = VTy->getVectorNumElements();
@@ -2132,7 +2172,7 @@ bool X86TTIImpl::enableInterleavedAccessVectorization() {
// TODO: We expect this to be beneficial regardless of arch,
// but there are currently some unexplained performance artifacts on Atom.
// As a temporary solution, disable on Atom.
- return !(ST->isAtom() || ST->isSLM());
+ return !(ST->isAtom());
}
// Get estimation for interleaved load/store operations and strided load.
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index ecaaf951cff7..9bef9e80c395 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -33,8 +33,6 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
const X86Subtarget *ST;
const X86TargetLowering *TLI;
- int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
const X86Subtarget *getST() const { return ST; }
const X86TargetLowering *getTLI() const { return TLI; }
@@ -53,7 +51,8 @@ public:
/// @{
unsigned getNumberOfRegisters(bool Vector);
- unsigned getRegisterBitWidth(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector) const;
+ unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
unsigned getMaxInterleaveFactor(unsigned VF);
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
@@ -63,11 +62,13 @@ public:
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, const Instruction *I = nullptr);
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
@@ -76,9 +77,11 @@ public:
const SCEV *Ptr);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF);
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = UINT_MAX);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF);
+ ArrayRef<Value *> Args, FastMathFlags FMF,
+ unsigned VF = 1);
int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 9766b84be652..d17dfac6a997 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -56,11 +56,11 @@ namespace {
// Core algorithm state:
// BlockState - Each block is either:
- // - PASS_THROUGH: There are neither YMM dirtying instructions nor
+ // - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
// vzeroupper instructions in this block.
// - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
- // block that will ensure that YMM is clean on exit.
- // - EXITS_DIRTY: An instruction in the block dirties YMM and no
+ // block that will ensure that YMM/ZMM is clean on exit.
+ // - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
// subsequent vzeroupper in the block clears it.
//
// AddedToDirtySuccessors - This flag is raised when a block is added to the
@@ -97,6 +97,7 @@ FunctionPass *llvm::createX86IssueVZeroUpperPass() {
return new VZeroUpperInserter();
}
+#ifndef NDEBUG
const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
switch (ST) {
case PASS_THROUGH: return "Pass-through";
@@ -105,52 +106,56 @@ const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
}
llvm_unreachable("Invalid block exit state.");
}
+#endif
-static bool isYmmReg(unsigned Reg) {
- return (Reg >= X86::YMM0 && Reg <= X86::YMM15);
+/// VZEROUPPER cleans state that is related to Y/ZMM0-15 only.
+/// Thus, there is no need to check for Y/ZMM16 and above.
+static bool isYmmOrZmmReg(unsigned Reg) {
+ return (Reg >= X86::YMM0 && Reg <= X86::YMM15) ||
+ (Reg >= X86::ZMM0 && Reg <= X86::ZMM15);
}
-static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
+static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI) {
for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),
E = MRI.livein_end(); I != E; ++I)
- if (isYmmReg(I->first))
+ if (isYmmOrZmmReg(I->first))
return true;
return false;
}
-static bool clobbersAllYmmRegs(const MachineOperand &MO) {
+static bool clobbersAllYmmAndZmmRegs(const MachineOperand &MO) {
for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
if (!MO.clobbersPhysReg(reg))
return false;
}
+ for (unsigned reg = X86::ZMM0; reg <= X86::ZMM15; ++reg) {
+ if (!MO.clobbersPhysReg(reg))
+ return false;
+ }
return true;
}
-static bool hasYmmReg(MachineInstr &MI) {
+static bool hasYmmOrZmmReg(MachineInstr &MI) {
for (const MachineOperand &MO : MI.operands()) {
- if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO))
+ if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmAndZmmRegs(MO))
return true;
if (!MO.isReg())
continue;
if (MO.isDebug())
continue;
- if (isYmmReg(MO.getReg()))
+ if (isYmmOrZmmReg(MO.getReg()))
return true;
}
return false;
}
-/// Check if any YMM register will be clobbered by this instruction.
-static bool callClobbersAnyYmmReg(MachineInstr &MI) {
+/// Check if given call instruction has a RegMask operand.
+static bool callHasRegMask(MachineInstr &MI) {
assert(MI.isCall() && "Can only be called on call instructions.");
for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isRegMask())
- continue;
- for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
- if (MO.clobbersPhysReg(reg))
- return true;
- }
+ if (MO.isRegMask())
+ return true;
}
return false;
}
@@ -175,17 +180,20 @@ void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
/// Loop over all of the instructions in the basic block, inserting vzeroupper
/// instructions before function calls.
void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
-
// Start by assuming that the block is PASS_THROUGH which implies no unguarded
// calls.
BlockExitState CurState = PASS_THROUGH;
BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end();
for (MachineInstr &MI : MBB) {
+ bool IsCall = MI.isCall();
+ bool IsReturn = MI.isReturn();
+ bool IsControlFlow = IsCall || IsReturn;
+
// No need for vzeroupper before iret in interrupt handler function,
- // epilogue will restore YMM registers if needed.
- bool IsReturnFromX86INTR = IsX86INTR && MI.isReturn();
- bool IsControlFlow = MI.isCall() || MI.isReturn();
+ // epilogue will restore YMM/ZMM registers if needed.
+ if (IsX86INTR && IsReturn)
+ continue;
// An existing VZERO* instruction resets the state.
if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) {
@@ -194,30 +202,30 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
}
// Shortcut: don't need to check regular instructions in dirty state.
- if ((!IsControlFlow || IsReturnFromX86INTR) && CurState == EXITS_DIRTY)
+ if (!IsControlFlow && CurState == EXITS_DIRTY)
continue;
- if (hasYmmReg(MI)) {
- // We found a ymm-using instruction; this could be an AVX instruction,
- // or it could be control flow.
+ if (hasYmmOrZmmReg(MI)) {
+ // We found a ymm/zmm-using instruction; this could be an AVX/AVX512
+ // instruction, or it could be control flow.
CurState = EXITS_DIRTY;
continue;
}
// Check for control-flow out of the current function (which might
// indirectly execute SSE instructions).
- if (!IsControlFlow || IsReturnFromX86INTR)
+ if (!IsControlFlow)
continue;
- // If the call won't clobber any YMM register, skip it as well. It usually
- // happens on helper function calls (such as '_chkstk', '_ftol2') where
- // standard calling convention is not used (RegMask is not used to mark
- // register clobbered and register usage (def/imp-def/use) is well-defined
- // and explicitly specified.
- if (MI.isCall() && !callClobbersAnyYmmReg(MI))
+ // If the call has no RegMask, skip it as well. It usually happens on
+ // helper function calls (such as '_chkstk', '_ftol2') where standard
+ // calling convention is not used (RegMask is not used to mark register
+ // clobbered and register usage (def/imp-def/use) is well-defined and
+ // explicitly specified.
+ if (IsCall && !callHasRegMask(MI))
continue;
- // The VZEROUPPER instruction resets the upper 128 bits of all AVX
+ // The VZEROUPPER instruction resets the upper 128 bits of YMM0-YMM15
// registers. In addition, the processor changes back to Clean state, after
// which execution of SSE instructions or AVX instructions has no transition
// penalty. Add the VZEROUPPER instruction before any function call/return
@@ -226,7 +234,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
// predecessor block.
if (CurState == EXITS_DIRTY) {
// After the inserted VZEROUPPER the state becomes clean again, but
- // other YMM may appear before other subsequent calls or even before
+ // other YMM/ZMM may appear before other subsequent calls or even before
// the end of the BB.
insertVZeroUpper(MI, MBB);
CurState = EXITS_CLEAN;
@@ -257,30 +265,32 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
/// function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- if (!ST.hasAVX() || ST.hasAVX512() || ST.hasFastPartialYMMWrite())
+ if (!ST.hasAVX() || ST.hasFastPartialYMMorZMMWrite())
return false;
TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
EverMadeChange = false;
IsX86INTR = MF.getFunction()->getCallingConv() == CallingConv::X86_INTR;
- bool FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
-
- // Fast check: if the function doesn't use any ymm registers, we don't need
- // to insert any VZEROUPPER instructions. This is constant-time, so it is
- // cheap in the common case of no ymm use.
- bool YMMUsed = FnHasLiveInYmm;
- if (!YMMUsed) {
- const TargetRegisterClass *RC = &X86::VR256RegClass;
- for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
- i++) {
- if (!MRI.reg_nodbg_empty(*i)) {
- YMMUsed = true;
- break;
+ bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
+
+ // Fast check: if the function doesn't use any ymm/zmm registers, we don't
+ // need to insert any VZEROUPPER instructions. This is constant-time, so it
+ // is cheap in the common case of no ymm/zmm use.
+ bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm;
+ const TargetRegisterClass *RCs[2] = {&X86::VR256RegClass, &X86::VR512RegClass};
+ for (auto *RC : RCs) {
+ if (!YmmOrZmmUsed) {
+ for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
+ i++) {
+ if (!MRI.reg_nodbg_empty(*i)) {
+ YmmOrZmmUsed = true;
+ break;
+ }
}
}
}
- if (!YMMUsed) {
+ if (!YmmOrZmmUsed) {
return false;
}
@@ -294,9 +304,9 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
processBasicBlock(MBB);
- // If any YMM regs are live-in to this function, add the entry block to the
- // DirtySuccessors list
- if (FnHasLiveInYmm)
+ // If any YMM/ZMM regs are live-in to this function, add the entry block to
+ // the DirtySuccessors list
+ if (FnHasLiveInYmmOrZmm)
addDirtySuccessor(MF.front());
// Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
index 500c84d2a418..b03c1852281d 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -12,13 +12,15 @@
//===----------------------------------------------------------------------===//
#include "XCoreInstPrinter.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
index dc513f7b225b..8a7efe2e39c6 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -15,6 +15,8 @@
#ifndef LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
#define LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
+
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -32,12 +34,14 @@ public:
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
+
private:
void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
};
+
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index c5859b7786f7..5fc58d831319 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -11,15 +11,19 @@
//
//===----------------------------------------------------------------------===//
-#include "XCoreMCTargetDesc.h"
#include "InstPrinter/XCoreInstPrinter.h"
-#include "XCoreMCAsmInfo.h"
+#include "MCTargetDesc/XCoreMCAsmInfo.h"
+#include "MCTargetDesc/XCoreMCTargetDesc.h"
#include "XCoreTargetStreamer.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -79,20 +83,25 @@ static MCInstPrinter *createXCoreMCInstPrinter(const Triple &T,
}
XCoreTargetStreamer::XCoreTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
-XCoreTargetStreamer::~XCoreTargetStreamer() {}
+
+XCoreTargetStreamer::~XCoreTargetStreamer() = default;
namespace {
class XCoreTargetAsmStreamer : public XCoreTargetStreamer {
formatted_raw_ostream &OS;
+
public:
XCoreTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+
void emitCCTopData(StringRef Name) override;
void emitCCTopFunction(StringRef Name) override;
void emitCCBottomData(StringRef Name) override;
void emitCCBottomFunction(StringRef Name) override;
};
+} // end anonymous namespace
+
XCoreTargetAsmStreamer::XCoreTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
: XCoreTargetStreamer(S), OS(OS) {}
@@ -112,7 +121,6 @@ void XCoreTargetAsmStreamer::emitCCBottomData(StringRef Name) {
void XCoreTargetAsmStreamer::emitCCBottomFunction(StringRef Name) {
OS << "\t.cc_bottom " << Name << ".function\n";
}
-}
static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index ac0f3fefbae7..1dc384fadf69 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -14,13 +14,13 @@
#ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
#define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
-#include "llvm/Support/DataTypes.h"
-
namespace llvm {
+
class Target;
+
Target &getTheXCoreTarget();
-} // End llvm namespace
+} // end namespace llvm
// Defines symbolic names for XCore registers. This defines a mapping from
// register name to register number.
@@ -36,4 +36,4 @@ Target &getTheXCoreTarget();
#define GET_SUBTARGETINFO_ENUM
#include "XCoreGenSubtargetInfo.inc"
-#endif
+#endif // LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index e0e2e0319964..a752357400b3 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -238,7 +238,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
report_fatal_error("emitPrologue unsupported alignment: "
+ Twine(MFI.getMaxAlignment()));
- const AttributeSet &PAL = MF.getFunction()->getAttributes();
+ const AttributeList &PAL = MF.getFunction()->getAttributes();
if (PAL.hasAttrSomewhere(Attribute::Nest))
BuildMI(MBB, MBBI, dl, TII.get(XCore::LDWSP_ru6), XCore::R11).addImm(0);
// FIX: Needs addMemOperand() but can't use getFixedStack() or getStack().
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 9244d594460f..45437815fa37 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -483,7 +483,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(DL).setChain(Chain).setCallee(
+ CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
CallingConv::C, IntPtrTy,
DAG.getExternalSymbol("__misaligned_load",
getPointerTy(DAG.getDataLayout())),
@@ -1824,6 +1824,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 41813bbb8156..188f4f1fa06b 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -202,6 +202,7 @@ namespace llvm {
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index cdcc52fdc32d..cf469ec3cf1a 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -14,50 +14,39 @@
#ifndef LLVM_LIB_TARGET_XCORE_XCOREMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_XCORE_XCOREMACHINEFUNCTIONINFO_H
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include <cassert>
+#include <utility>
#include <vector>
namespace llvm {
-// Forward declarations
-class Function;
-
/// XCoreFunctionInfo - This class is derived from MachineFunction private
/// XCore target-specific information for each MachineFunction.
class XCoreFunctionInfo : public MachineFunctionInfo {
- virtual void anchor();
- bool LRSpillSlotSet;
+ bool LRSpillSlotSet = false;
int LRSpillSlot;
- bool FPSpillSlotSet;
+ bool FPSpillSlotSet = false;
int FPSpillSlot;
- bool EHSpillSlotSet;
+ bool EHSpillSlotSet = false;
int EHSpillSlot[2];
unsigned ReturnStackOffset;
- bool ReturnStackOffsetSet;
- int VarArgsFrameIndex;
- mutable int CachedEStackSize;
+ bool ReturnStackOffsetSet = false;
+ int VarArgsFrameIndex = 0;
+ mutable int CachedEStackSize = -1;
std::vector<std::pair<MachineBasicBlock::iterator, CalleeSavedInfo>>
SpillLabels;
+ virtual void anchor();
+
public:
- XCoreFunctionInfo() :
- LRSpillSlotSet(false),
- FPSpillSlotSet(false),
- EHSpillSlotSet(false),
- ReturnStackOffsetSet(false),
- VarArgsFrameIndex(0),
- CachedEStackSize(-1) {}
+ XCoreFunctionInfo() = default;
- explicit XCoreFunctionInfo(MachineFunction &MF) :
- LRSpillSlotSet(false),
- FPSpillSlotSet(false),
- EHSpillSlotSet(false),
- ReturnStackOffsetSet(false),
- VarArgsFrameIndex(0),
- CachedEStackSize(-1) {}
+ explicit XCoreFunctionInfo(MachineFunction &MF) {}
- ~XCoreFunctionInfo() {}
+ ~XCoreFunctionInfo() override = default;
void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
@@ -101,6 +90,7 @@ public:
return SpillLabels;
}
};
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_XCORE_XCOREMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index c03b0afceba3..646309e02de8 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -35,11 +35,11 @@ SDValue XCoreSelectionDAGInfo::EmitTargetCodeForMemcpy(
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__memcpy_4",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(
+ "__memcpy_4", TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args))
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index bf3138f2164a..e28e05c7f6a8 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -10,15 +10,19 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/XCoreMCTargetDesc.h"
+#include "XCore.h"
#include "XCoreTargetMachine.h"
#include "XCoreTargetObjectFile.h"
#include "XCoreTargetTransformInfo.h"
-#include "XCore.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/TargetRegistry.h"
+
using namespace llvm;
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
@@ -38,14 +42,15 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(
T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32",
TT, CPU, FS, Options, getEffectiveRelocModel(RM), CM, OL),
- TLOF(make_unique<XCoreTargetObjectFile>()),
+ TLOF(llvm::make_unique<XCoreTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
-XCoreTargetMachine::~XCoreTargetMachine() {}
+XCoreTargetMachine::~XCoreTargetMachine() = default;
namespace {
+
/// XCore Code Generator Pass Configuration Options.
class XCorePassConfig : public TargetPassConfig {
public:
@@ -61,7 +66,8 @@ public:
bool addInstSelector() override;
void addPreEmitPass() override;
};
-} // namespace
+
+} // end anonymous namespace
TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
return new XCorePassConfig(this, PM);
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 4bd25bc8776c..2b53f01a996d 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -15,13 +15,19 @@
#define LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H
#include "XCoreSubtarget.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
+#include <memory>
namespace llvm {
class XCoreTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
XCoreSubtarget Subtarget;
+
public:
XCoreTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -38,6 +44,7 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
TargetIRAnalysis getTargetIRAnalysis() override;
+
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
@@ -45,4 +52,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H